Merge branch 'for-4.20-fixes' into for-4.21
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "vi.h"
29 #include "vi_structs.h"
30 #include "vid.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
35
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
38
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
41
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #include "ivsrcid/ivsrcid_vislands30.h"
55
56 #define GFX8_NUM_GFX_RINGS     1
57 #define GFX8_MEC_HPD_SIZE 2048
58
59 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
60 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
61 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
62 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
63
64 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
65 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
66 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
67 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
68 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
69 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
70 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
71 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
72 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
73
74 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
75 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
76 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
77 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
78 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
79 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
80
81 /* BPM SERDES CMD */
82 #define SET_BPM_SERDES_CMD    1
83 #define CLE_BPM_SERDES_CMD    0
84
85 /* BPM Register Address*/
86 enum {
87         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
88         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
89         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
90         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
91         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
92         BPM_REG_FGCG_MAX
93 };
94
95 #define RLC_FormatDirectRegListLength        14
96
97 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
103
104 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
107 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
109
110 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
116
117 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
120 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
122
123 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
129
130 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
131 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141
142 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
153
154 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
165
166 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
167 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
168 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
172
173 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
174 {
175         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
176         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
177         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
178         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
179         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
180         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
181         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
182         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
183         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
184         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
185         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
186         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
187         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
188         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
189         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
190         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
191 };
192
193 static const u32 golden_settings_tonga_a11[] =
194 {
195         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
196         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
197         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
198         mmGB_GPU_ID, 0x0000000f, 0x00000000,
199         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
200         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
201         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
202         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
203         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
204         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
205         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
206         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
207         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
208         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
209         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
210         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
211 };
212
213 static const u32 tonga_golden_common_all[] =
214 {
215         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
216         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
217         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
218         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
219         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
220         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
221         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
222         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
223 };
224
225 static const u32 tonga_mgcg_cgcg_init[] =
226 {
227         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
228         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
229         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
231         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
232         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
234         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
236         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
237         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
238         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
239         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
245         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
246         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
247         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
249         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
250         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
251         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
252         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
253         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
254         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
255         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
256         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
257         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
258         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
261         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
266         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
271         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
274         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
275         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
276         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
277         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
278         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
279         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
280         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
281         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
282         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
283         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
284         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
285         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
286         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
287         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
288         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
289         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
290         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
291         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
292         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
293         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
294         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
295         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
296         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
297         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
298         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
299         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
300         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
301         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
302 };
303
304 static const u32 golden_settings_vegam_a11[] =
305 {
306         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
307         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
308         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
309         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
310         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
311         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
312         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
313         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
314         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
315         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
316         mmSQ_CONFIG, 0x07f80000, 0x01180000,
317         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
318         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
319         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
320         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
321         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
322         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
323 };
324
325 static const u32 vegam_golden_common_all[] =
326 {
327         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
328         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
332         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
333 };
334
335 static const u32 golden_settings_polaris11_a11[] =
336 {
337         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
338         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
339         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
340         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
341         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
342         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
343         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
344         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
345         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
346         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
347         mmSQ_CONFIG, 0x07f80000, 0x01180000,
348         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
349         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
350         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
351         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
352         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
353         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
354 };
355
356 static const u32 polaris11_golden_common_all[] =
357 {
358         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
359         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
360         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
361         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
362         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
363         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
364 };
365
366 static const u32 golden_settings_polaris10_a11[] =
367 {
368         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
369         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
370         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
371         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
372         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
373         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
374         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
375         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
376         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
377         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
378         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
379         mmSQ_CONFIG, 0x07f80000, 0x07180000,
380         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
381         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
382         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
383         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
384         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
385 };
386
387 static const u32 polaris10_golden_common_all[] =
388 {
389         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
390         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
391         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
392         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
393         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
394         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
395         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
396         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
397 };
398
399 static const u32 fiji_golden_common_all[] =
400 {
401         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
402         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
403         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
404         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
405         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
406         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
407         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
408         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
409         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
410         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
411 };
412
413 static const u32 golden_settings_fiji_a10[] =
414 {
415         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
416         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
417         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
420         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
421         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
422         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
423         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
424         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
425         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
426 };
427
428 static const u32 fiji_mgcg_cgcg_init[] =
429 {
430         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
431         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
432         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
433         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
434         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
435         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
437         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
439         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
441         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
452         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
456         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
457         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
459         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
460         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
461         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
462         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
463         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
464         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
465 };
466
467 static const u32 golden_settings_iceland_a11[] =
468 {
469         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
470         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
471         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
472         mmGB_GPU_ID, 0x0000000f, 0x00000000,
473         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
474         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
475         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
476         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
477         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
478         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
479         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
480         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
481         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
482         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
483         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
484         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
485 };
486
487 static const u32 iceland_golden_common_all[] =
488 {
489         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
490         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
491         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
492         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
493         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
494         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
495         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
496         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
497 };
498
499 static const u32 iceland_mgcg_cgcg_init[] =
500 {
501         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
502         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
503         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
504         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
505         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
506         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
507         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
508         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
509         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
510         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
511         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
512         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
513         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
514         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
515         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
519         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
520         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
521         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
523         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
524         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
525         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
526         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
527         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
528         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
529         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
530         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
531         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
532         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
533         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
534         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
535         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
536         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
537         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
538         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
539         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
540         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
541         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
542         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
543         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
544         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
545         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
546         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
547         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
548         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
549         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
550         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
551         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
552         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
553         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
554         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
555         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
556         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
557         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
558         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
559         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
560         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
561         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
562         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
563         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
564         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
565 };
566
567 static const u32 cz_golden_settings_a11[] =
568 {
569         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
570         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
571         mmGB_GPU_ID, 0x0000000f, 0x00000000,
572         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
573         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
574         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
575         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
576         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
577         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
578         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
579         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
580         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
581 };
582
583 static const u32 cz_golden_common_all[] =
584 {
585         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
586         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
587         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
588         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
589         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
590         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
591         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
592         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
593 };
594
595 static const u32 cz_mgcg_cgcg_init[] =
596 {
597         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
598         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
599         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
600         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
601         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
602         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
606         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
608         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
609         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
610         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
611         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
615         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
616         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
617         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
619         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
620         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
621         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
622         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
623         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
624         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
625         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
626         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
627         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
628         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
629         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
630         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
631         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
632         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
633         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
634         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
635         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
636         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
637         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
638         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
639         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
640         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
641         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
642         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
643         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
644         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
645         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
646         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
647         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
648         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
649         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
650         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
651         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
652         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
653         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
654         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
655         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
656         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
657         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
658         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
659         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
660         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
661         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
662         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
663         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
664         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
665         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
666         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
667         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
668         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
669         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
670         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
671         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
672 };
673
674 static const u32 stoney_golden_settings_a11[] =
675 {
676         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
677         mmGB_GPU_ID, 0x0000000f, 0x00000000,
678         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
679         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
680         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
681         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
682         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
683         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
684         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
685         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
686 };
687
688 static const u32 stoney_golden_common_all[] =
689 {
690         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
691         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
692         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
693         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
694         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
695         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
696         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
697         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
698 };
699
700 static const u32 stoney_mgcg_cgcg_init[] =
701 {
702         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
703         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
704         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
705         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
706         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
707 };
708
709
710 static const char * const sq_edc_source_names[] = {
711         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
712         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
713         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
714         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
715         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
716         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
717         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
718 };
719
720 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
721 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
722 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
723 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
724 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
725 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
726 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
727 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
728
729 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
730 {
731         switch (adev->asic_type) {
732         case CHIP_TOPAZ:
733                 amdgpu_device_program_register_sequence(adev,
734                                                         iceland_mgcg_cgcg_init,
735                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
736                 amdgpu_device_program_register_sequence(adev,
737                                                         golden_settings_iceland_a11,
738                                                         ARRAY_SIZE(golden_settings_iceland_a11));
739                 amdgpu_device_program_register_sequence(adev,
740                                                         iceland_golden_common_all,
741                                                         ARRAY_SIZE(iceland_golden_common_all));
742                 break;
743         case CHIP_FIJI:
744                 amdgpu_device_program_register_sequence(adev,
745                                                         fiji_mgcg_cgcg_init,
746                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
747                 amdgpu_device_program_register_sequence(adev,
748                                                         golden_settings_fiji_a10,
749                                                         ARRAY_SIZE(golden_settings_fiji_a10));
750                 amdgpu_device_program_register_sequence(adev,
751                                                         fiji_golden_common_all,
752                                                         ARRAY_SIZE(fiji_golden_common_all));
753                 break;
754
755         case CHIP_TONGA:
756                 amdgpu_device_program_register_sequence(adev,
757                                                         tonga_mgcg_cgcg_init,
758                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
759                 amdgpu_device_program_register_sequence(adev,
760                                                         golden_settings_tonga_a11,
761                                                         ARRAY_SIZE(golden_settings_tonga_a11));
762                 amdgpu_device_program_register_sequence(adev,
763                                                         tonga_golden_common_all,
764                                                         ARRAY_SIZE(tonga_golden_common_all));
765                 break;
766         case CHIP_VEGAM:
767                 amdgpu_device_program_register_sequence(adev,
768                                                         golden_settings_vegam_a11,
769                                                         ARRAY_SIZE(golden_settings_vegam_a11));
770                 amdgpu_device_program_register_sequence(adev,
771                                                         vegam_golden_common_all,
772                                                         ARRAY_SIZE(vegam_golden_common_all));
773                 break;
774         case CHIP_POLARIS11:
775         case CHIP_POLARIS12:
776                 amdgpu_device_program_register_sequence(adev,
777                                                         golden_settings_polaris11_a11,
778                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
779                 amdgpu_device_program_register_sequence(adev,
780                                                         polaris11_golden_common_all,
781                                                         ARRAY_SIZE(polaris11_golden_common_all));
782                 break;
783         case CHIP_POLARIS10:
784                 amdgpu_device_program_register_sequence(adev,
785                                                         golden_settings_polaris10_a11,
786                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
787                 amdgpu_device_program_register_sequence(adev,
788                                                         polaris10_golden_common_all,
789                                                         ARRAY_SIZE(polaris10_golden_common_all));
790                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
791                 if (adev->pdev->revision == 0xc7 &&
792                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
793                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
794                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
795                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
796                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
797                 }
798                 break;
799         case CHIP_CARRIZO:
800                 amdgpu_device_program_register_sequence(adev,
801                                                         cz_mgcg_cgcg_init,
802                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
803                 amdgpu_device_program_register_sequence(adev,
804                                                         cz_golden_settings_a11,
805                                                         ARRAY_SIZE(cz_golden_settings_a11));
806                 amdgpu_device_program_register_sequence(adev,
807                                                         cz_golden_common_all,
808                                                         ARRAY_SIZE(cz_golden_common_all));
809                 break;
810         case CHIP_STONEY:
811                 amdgpu_device_program_register_sequence(adev,
812                                                         stoney_mgcg_cgcg_init,
813                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
814                 amdgpu_device_program_register_sequence(adev,
815                                                         stoney_golden_settings_a11,
816                                                         ARRAY_SIZE(stoney_golden_settings_a11));
817                 amdgpu_device_program_register_sequence(adev,
818                                                         stoney_golden_common_all,
819                                                         ARRAY_SIZE(stoney_golden_common_all));
820                 break;
821         default:
822                 break;
823         }
824 }
825
826 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
827 {
828         adev->gfx.scratch.num_reg = 8;
829         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
830         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
831 }
832
833 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
834 {
835         struct amdgpu_device *adev = ring->adev;
836         uint32_t scratch;
837         uint32_t tmp = 0;
838         unsigned i;
839         int r;
840
841         r = amdgpu_gfx_scratch_get(adev, &scratch);
842         if (r) {
843                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
844                 return r;
845         }
846         WREG32(scratch, 0xCAFEDEAD);
847         r = amdgpu_ring_alloc(ring, 3);
848         if (r) {
849                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
850                           ring->idx, r);
851                 amdgpu_gfx_scratch_free(adev, scratch);
852                 return r;
853         }
854         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
855         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
856         amdgpu_ring_write(ring, 0xDEADBEEF);
857         amdgpu_ring_commit(ring);
858
859         for (i = 0; i < adev->usec_timeout; i++) {
860                 tmp = RREG32(scratch);
861                 if (tmp == 0xDEADBEEF)
862                         break;
863                 DRM_UDELAY(1);
864         }
865         if (i < adev->usec_timeout) {
866                 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
867                          ring->idx, i);
868         } else {
869                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
870                           ring->idx, scratch, tmp);
871                 r = -EINVAL;
872         }
873         amdgpu_gfx_scratch_free(adev, scratch);
874         return r;
875 }
876
877 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
878 {
879         struct amdgpu_device *adev = ring->adev;
880         struct amdgpu_ib ib;
881         struct dma_fence *f = NULL;
882
883         unsigned int index;
884         uint64_t gpu_addr;
885         uint32_t tmp;
886         long r;
887
888         r = amdgpu_device_wb_get(adev, &index);
889         if (r) {
890                 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
891                 return r;
892         }
893
894         gpu_addr = adev->wb.gpu_addr + (index * 4);
895         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
896         memset(&ib, 0, sizeof(ib));
897         r = amdgpu_ib_get(adev, NULL, 16, &ib);
898         if (r) {
899                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
900                 goto err1;
901         }
902         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
903         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
904         ib.ptr[2] = lower_32_bits(gpu_addr);
905         ib.ptr[3] = upper_32_bits(gpu_addr);
906         ib.ptr[4] = 0xDEADBEEF;
907         ib.length_dw = 5;
908
909         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
910         if (r)
911                 goto err2;
912
913         r = dma_fence_wait_timeout(f, false, timeout);
914         if (r == 0) {
915                 DRM_ERROR("amdgpu: IB test timed out.\n");
916                 r = -ETIMEDOUT;
917                 goto err2;
918         } else if (r < 0) {
919                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
920                 goto err2;
921         }
922
923         tmp = adev->wb.wb[index];
924         if (tmp == 0xDEADBEEF) {
925                 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
926                 r = 0;
927         } else {
928                 DRM_ERROR("ib test on ring %d failed\n", ring->idx);
929                 r = -EINVAL;
930         }
931
932 err2:
933         amdgpu_ib_free(adev, &ib, NULL);
934         dma_fence_put(f);
935 err1:
936         amdgpu_device_wb_free(adev, index);
937         return r;
938 }
939
940
941 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
942 {
943         release_firmware(adev->gfx.pfp_fw);
944         adev->gfx.pfp_fw = NULL;
945         release_firmware(adev->gfx.me_fw);
946         adev->gfx.me_fw = NULL;
947         release_firmware(adev->gfx.ce_fw);
948         adev->gfx.ce_fw = NULL;
949         release_firmware(adev->gfx.rlc_fw);
950         adev->gfx.rlc_fw = NULL;
951         release_firmware(adev->gfx.mec_fw);
952         adev->gfx.mec_fw = NULL;
953         if ((adev->asic_type != CHIP_STONEY) &&
954             (adev->asic_type != CHIP_TOPAZ))
955                 release_firmware(adev->gfx.mec2_fw);
956         adev->gfx.mec2_fw = NULL;
957
958         kfree(adev->gfx.rlc.register_list_format);
959 }
960
961 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
962 {
963         const char *chip_name;
964         char fw_name[30];
965         int err;
966         struct amdgpu_firmware_info *info = NULL;
967         const struct common_firmware_header *header = NULL;
968         const struct gfx_firmware_header_v1_0 *cp_hdr;
969         const struct rlc_firmware_header_v2_0 *rlc_hdr;
970         unsigned int *tmp = NULL, i;
971
972         DRM_DEBUG("\n");
973
974         switch (adev->asic_type) {
975         case CHIP_TOPAZ:
976                 chip_name = "topaz";
977                 break;
978         case CHIP_TONGA:
979                 chip_name = "tonga";
980                 break;
981         case CHIP_CARRIZO:
982                 chip_name = "carrizo";
983                 break;
984         case CHIP_FIJI:
985                 chip_name = "fiji";
986                 break;
987         case CHIP_STONEY:
988                 chip_name = "stoney";
989                 break;
990         case CHIP_POLARIS10:
991                 chip_name = "polaris10";
992                 break;
993         case CHIP_POLARIS11:
994                 chip_name = "polaris11";
995                 break;
996         case CHIP_POLARIS12:
997                 chip_name = "polaris12";
998                 break;
999         case CHIP_VEGAM:
1000                 chip_name = "vegam";
1001                 break;
1002         default:
1003                 BUG();
1004         }
1005
1006         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1007                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
1008                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1009                 if (err == -ENOENT) {
1010                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1011                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1012                 }
1013         } else {
1014                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1015                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1016         }
1017         if (err)
1018                 goto out;
1019         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1020         if (err)
1021                 goto out;
1022         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1023         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1025
1026         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1027                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1028                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1029                 if (err == -ENOENT) {
1030                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1031                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1032                 }
1033         } else {
1034                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1035                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1036         }
1037         if (err)
1038                 goto out;
1039         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1040         if (err)
1041                 goto out;
1042         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1043         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1044
1045         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1046
1047         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1048                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1049                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1050                 if (err == -ENOENT) {
1051                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1052                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1053                 }
1054         } else {
1055                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1056                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1057         }
1058         if (err)
1059                 goto out;
1060         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1061         if (err)
1062                 goto out;
1063         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1064         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1065         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1066
1067         /*
1068          * Support for MCBP/Virtualization in combination with chained IBs is
1069          * formal released on feature version #46
1070          */
1071         if (adev->gfx.ce_feature_version >= 46 &&
1072             adev->gfx.pfp_feature_version >= 46) {
1073                 adev->virt.chained_ib_support = true;
1074                 DRM_INFO("Chained IB support enabled!\n");
1075         } else
1076                 adev->virt.chained_ib_support = false;
1077
1078         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1079         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1080         if (err)
1081                 goto out;
1082         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1083         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1084         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1085         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1086
1087         adev->gfx.rlc.save_and_restore_offset =
1088                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1089         adev->gfx.rlc.clear_state_descriptor_offset =
1090                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1091         adev->gfx.rlc.avail_scratch_ram_locations =
1092                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1093         adev->gfx.rlc.reg_restore_list_size =
1094                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1095         adev->gfx.rlc.reg_list_format_start =
1096                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1097         adev->gfx.rlc.reg_list_format_separate_start =
1098                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1099         adev->gfx.rlc.starting_offsets_start =
1100                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1101         adev->gfx.rlc.reg_list_format_size_bytes =
1102                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1103         adev->gfx.rlc.reg_list_size_bytes =
1104                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1105
1106         adev->gfx.rlc.register_list_format =
1107                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1108                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1109
1110         if (!adev->gfx.rlc.register_list_format) {
1111                 err = -ENOMEM;
1112                 goto out;
1113         }
1114
1115         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1116                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1117         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1118                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1119
1120         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1121
1122         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1123                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1124         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1125                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1126
1127         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1128                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1129                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1130                 if (err == -ENOENT) {
1131                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1132                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1133                 }
1134         } else {
1135                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1136                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1137         }
1138         if (err)
1139                 goto out;
1140         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1141         if (err)
1142                 goto out;
1143         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1144         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1145         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1146
1147         if ((adev->asic_type != CHIP_STONEY) &&
1148             (adev->asic_type != CHIP_TOPAZ)) {
1149                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1150                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1151                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1152                         if (err == -ENOENT) {
1153                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1154                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1155                         }
1156                 } else {
1157                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1158                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1159                 }
1160                 if (!err) {
1161                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1162                         if (err)
1163                                 goto out;
1164                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1165                                 adev->gfx.mec2_fw->data;
1166                         adev->gfx.mec2_fw_version =
1167                                 le32_to_cpu(cp_hdr->header.ucode_version);
1168                         adev->gfx.mec2_feature_version =
1169                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1170                 } else {
1171                         err = 0;
1172                         adev->gfx.mec2_fw = NULL;
1173                 }
1174         }
1175
1176         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1177         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1178         info->fw = adev->gfx.pfp_fw;
1179         header = (const struct common_firmware_header *)info->fw->data;
1180         adev->firmware.fw_size +=
1181                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1182
1183         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1184         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1185         info->fw = adev->gfx.me_fw;
1186         header = (const struct common_firmware_header *)info->fw->data;
1187         adev->firmware.fw_size +=
1188                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1189
1190         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1191         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1192         info->fw = adev->gfx.ce_fw;
1193         header = (const struct common_firmware_header *)info->fw->data;
1194         adev->firmware.fw_size +=
1195                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1196
1197         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1198         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1199         info->fw = adev->gfx.rlc_fw;
1200         header = (const struct common_firmware_header *)info->fw->data;
1201         adev->firmware.fw_size +=
1202                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1203
1204         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1205         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1206         info->fw = adev->gfx.mec_fw;
1207         header = (const struct common_firmware_header *)info->fw->data;
1208         adev->firmware.fw_size +=
1209                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1210
1211         /* we need account JT in */
1212         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1213         adev->firmware.fw_size +=
1214                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1215
1216         if (amdgpu_sriov_vf(adev)) {
1217                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1218                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1219                 info->fw = adev->gfx.mec_fw;
1220                 adev->firmware.fw_size +=
1221                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1222         }
1223
1224         if (adev->gfx.mec2_fw) {
1225                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1226                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1227                 info->fw = adev->gfx.mec2_fw;
1228                 header = (const struct common_firmware_header *)info->fw->data;
1229                 adev->firmware.fw_size +=
1230                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1231         }
1232
1233 out:
1234         if (err) {
1235                 dev_err(adev->dev,
1236                         "gfx8: Failed to load firmware \"%s\"\n",
1237                         fw_name);
1238                 release_firmware(adev->gfx.pfp_fw);
1239                 adev->gfx.pfp_fw = NULL;
1240                 release_firmware(adev->gfx.me_fw);
1241                 adev->gfx.me_fw = NULL;
1242                 release_firmware(adev->gfx.ce_fw);
1243                 adev->gfx.ce_fw = NULL;
1244                 release_firmware(adev->gfx.rlc_fw);
1245                 adev->gfx.rlc_fw = NULL;
1246                 release_firmware(adev->gfx.mec_fw);
1247                 adev->gfx.mec_fw = NULL;
1248                 release_firmware(adev->gfx.mec2_fw);
1249                 adev->gfx.mec2_fw = NULL;
1250         }
1251         return err;
1252 }
1253
1254 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1255                                     volatile u32 *buffer)
1256 {
1257         u32 count = 0, i;
1258         const struct cs_section_def *sect = NULL;
1259         const struct cs_extent_def *ext = NULL;
1260
1261         if (adev->gfx.rlc.cs_data == NULL)
1262                 return;
1263         if (buffer == NULL)
1264                 return;
1265
1266         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1267         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1268
1269         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1270         buffer[count++] = cpu_to_le32(0x80000000);
1271         buffer[count++] = cpu_to_le32(0x80000000);
1272
1273         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1274                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1275                         if (sect->id == SECT_CONTEXT) {
1276                                 buffer[count++] =
1277                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1278                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1279                                                 PACKET3_SET_CONTEXT_REG_START);
1280                                 for (i = 0; i < ext->reg_count; i++)
1281                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1282                         } else {
1283                                 return;
1284                         }
1285                 }
1286         }
1287
1288         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1289         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1290                         PACKET3_SET_CONTEXT_REG_START);
1291         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1292         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1293
1294         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1295         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1296
1297         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1298         buffer[count++] = cpu_to_le32(0);
1299 }
1300
1301 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1302 {
1303         const __le32 *fw_data;
1304         volatile u32 *dst_ptr;
1305         int me, i, max_me = 4;
1306         u32 bo_offset = 0;
1307         u32 table_offset, table_size;
1308
1309         if (adev->asic_type == CHIP_CARRIZO)
1310                 max_me = 5;
1311
1312         /* write the cp table buffer */
1313         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1314         for (me = 0; me < max_me; me++) {
1315                 if (me == 0) {
1316                         const struct gfx_firmware_header_v1_0 *hdr =
1317                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1318                         fw_data = (const __le32 *)
1319                                 (adev->gfx.ce_fw->data +
1320                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1321                         table_offset = le32_to_cpu(hdr->jt_offset);
1322                         table_size = le32_to_cpu(hdr->jt_size);
1323                 } else if (me == 1) {
1324                         const struct gfx_firmware_header_v1_0 *hdr =
1325                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1326                         fw_data = (const __le32 *)
1327                                 (adev->gfx.pfp_fw->data +
1328                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1329                         table_offset = le32_to_cpu(hdr->jt_offset);
1330                         table_size = le32_to_cpu(hdr->jt_size);
1331                 } else if (me == 2) {
1332                         const struct gfx_firmware_header_v1_0 *hdr =
1333                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1334                         fw_data = (const __le32 *)
1335                                 (adev->gfx.me_fw->data +
1336                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1337                         table_offset = le32_to_cpu(hdr->jt_offset);
1338                         table_size = le32_to_cpu(hdr->jt_size);
1339                 } else if (me == 3) {
1340                         const struct gfx_firmware_header_v1_0 *hdr =
1341                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1342                         fw_data = (const __le32 *)
1343                                 (adev->gfx.mec_fw->data +
1344                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1345                         table_offset = le32_to_cpu(hdr->jt_offset);
1346                         table_size = le32_to_cpu(hdr->jt_size);
1347                 } else  if (me == 4) {
1348                         const struct gfx_firmware_header_v1_0 *hdr =
1349                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1350                         fw_data = (const __le32 *)
1351                                 (adev->gfx.mec2_fw->data +
1352                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1353                         table_offset = le32_to_cpu(hdr->jt_offset);
1354                         table_size = le32_to_cpu(hdr->jt_size);
1355                 }
1356
1357                 for (i = 0; i < table_size; i ++) {
1358                         dst_ptr[bo_offset + i] =
1359                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1360                 }
1361
1362                 bo_offset += table_size;
1363         }
1364 }
1365
1366 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1367 {
1368         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1369         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1370 }
1371
1372 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1373 {
1374         volatile u32 *dst_ptr;
1375         u32 dws;
1376         const struct cs_section_def *cs_data;
1377         int r;
1378
1379         adev->gfx.rlc.cs_data = vi_cs_data;
1380
1381         cs_data = adev->gfx.rlc.cs_data;
1382
1383         if (cs_data) {
1384                 /* clear state block */
1385                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1386
1387                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1388                                               AMDGPU_GEM_DOMAIN_VRAM,
1389                                               &adev->gfx.rlc.clear_state_obj,
1390                                               &adev->gfx.rlc.clear_state_gpu_addr,
1391                                               (void **)&adev->gfx.rlc.cs_ptr);
1392                 if (r) {
1393                         dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1394                         gfx_v8_0_rlc_fini(adev);
1395                         return r;
1396                 }
1397
1398                 /* set up the cs buffer */
1399                 dst_ptr = adev->gfx.rlc.cs_ptr;
1400                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1401                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1402                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1403         }
1404
1405         if ((adev->asic_type == CHIP_CARRIZO) ||
1406             (adev->asic_type == CHIP_STONEY)) {
1407                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1408                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1409                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1410                                               &adev->gfx.rlc.cp_table_obj,
1411                                               &adev->gfx.rlc.cp_table_gpu_addr,
1412                                               (void **)&adev->gfx.rlc.cp_table_ptr);
1413                 if (r) {
1414                         dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1415                         return r;
1416                 }
1417
1418                 cz_init_cp_jump_table(adev);
1419
1420                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1421                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1422         }
1423
1424         return 0;
1425 }
1426
1427 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1428 {
1429         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1430 }
1431
1432 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1433 {
1434         int r;
1435         u32 *hpd;
1436         size_t mec_hpd_size;
1437
1438         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1439
1440         /* take ownership of the relevant compute queues */
1441         amdgpu_gfx_compute_queue_acquire(adev);
1442
1443         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1444
1445         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1446                                       AMDGPU_GEM_DOMAIN_GTT,
1447                                       &adev->gfx.mec.hpd_eop_obj,
1448                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1449                                       (void **)&hpd);
1450         if (r) {
1451                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1452                 return r;
1453         }
1454
1455         memset(hpd, 0, mec_hpd_size);
1456
1457         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1458         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1459
1460         return 0;
1461 }
1462
1463 static const u32 vgpr_init_compute_shader[] =
1464 {
1465         0x7e000209, 0x7e020208,
1466         0x7e040207, 0x7e060206,
1467         0x7e080205, 0x7e0a0204,
1468         0x7e0c0203, 0x7e0e0202,
1469         0x7e100201, 0x7e120200,
1470         0x7e140209, 0x7e160208,
1471         0x7e180207, 0x7e1a0206,
1472         0x7e1c0205, 0x7e1e0204,
1473         0x7e200203, 0x7e220202,
1474         0x7e240201, 0x7e260200,
1475         0x7e280209, 0x7e2a0208,
1476         0x7e2c0207, 0x7e2e0206,
1477         0x7e300205, 0x7e320204,
1478         0x7e340203, 0x7e360202,
1479         0x7e380201, 0x7e3a0200,
1480         0x7e3c0209, 0x7e3e0208,
1481         0x7e400207, 0x7e420206,
1482         0x7e440205, 0x7e460204,
1483         0x7e480203, 0x7e4a0202,
1484         0x7e4c0201, 0x7e4e0200,
1485         0x7e500209, 0x7e520208,
1486         0x7e540207, 0x7e560206,
1487         0x7e580205, 0x7e5a0204,
1488         0x7e5c0203, 0x7e5e0202,
1489         0x7e600201, 0x7e620200,
1490         0x7e640209, 0x7e660208,
1491         0x7e680207, 0x7e6a0206,
1492         0x7e6c0205, 0x7e6e0204,
1493         0x7e700203, 0x7e720202,
1494         0x7e740201, 0x7e760200,
1495         0x7e780209, 0x7e7a0208,
1496         0x7e7c0207, 0x7e7e0206,
1497         0xbf8a0000, 0xbf810000,
1498 };
1499
1500 static const u32 sgpr_init_compute_shader[] =
1501 {
1502         0xbe8a0100, 0xbe8c0102,
1503         0xbe8e0104, 0xbe900106,
1504         0xbe920108, 0xbe940100,
1505         0xbe960102, 0xbe980104,
1506         0xbe9a0106, 0xbe9c0108,
1507         0xbe9e0100, 0xbea00102,
1508         0xbea20104, 0xbea40106,
1509         0xbea60108, 0xbea80100,
1510         0xbeaa0102, 0xbeac0104,
1511         0xbeae0106, 0xbeb00108,
1512         0xbeb20100, 0xbeb40102,
1513         0xbeb60104, 0xbeb80106,
1514         0xbeba0108, 0xbebc0100,
1515         0xbebe0102, 0xbec00104,
1516         0xbec20106, 0xbec40108,
1517         0xbec60100, 0xbec80102,
1518         0xbee60004, 0xbee70005,
1519         0xbeea0006, 0xbeeb0007,
1520         0xbee80008, 0xbee90009,
1521         0xbefc0000, 0xbf8a0000,
1522         0xbf810000, 0x00000000,
1523 };
1524
1525 static const u32 vgpr_init_regs[] =
1526 {
1527         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1528         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1529         mmCOMPUTE_NUM_THREAD_X, 256*4,
1530         mmCOMPUTE_NUM_THREAD_Y, 1,
1531         mmCOMPUTE_NUM_THREAD_Z, 1,
1532         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1533         mmCOMPUTE_PGM_RSRC2, 20,
1534         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1535         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1536         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1537         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1538         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1539         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1540         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1541         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1542         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1543         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1544 };
1545
1546 static const u32 sgpr1_init_regs[] =
1547 {
1548         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1549         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1550         mmCOMPUTE_NUM_THREAD_X, 256*5,
1551         mmCOMPUTE_NUM_THREAD_Y, 1,
1552         mmCOMPUTE_NUM_THREAD_Z, 1,
1553         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1554         mmCOMPUTE_PGM_RSRC2, 20,
1555         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1556         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1557         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1558         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1559         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1560         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1561         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1562         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1563         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1564         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1565 };
1566
1567 static const u32 sgpr2_init_regs[] =
1568 {
1569         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1570         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1571         mmCOMPUTE_NUM_THREAD_X, 256*5,
1572         mmCOMPUTE_NUM_THREAD_Y, 1,
1573         mmCOMPUTE_NUM_THREAD_Z, 1,
1574         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1575         mmCOMPUTE_PGM_RSRC2, 20,
1576         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1577         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1578         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1579         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1580         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1581         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1582         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1583         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1584         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1585         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1586 };
1587
1588 static const u32 sec_ded_counter_registers[] =
1589 {
1590         mmCPC_EDC_ATC_CNT,
1591         mmCPC_EDC_SCRATCH_CNT,
1592         mmCPC_EDC_UCODE_CNT,
1593         mmCPF_EDC_ATC_CNT,
1594         mmCPF_EDC_ROQ_CNT,
1595         mmCPF_EDC_TAG_CNT,
1596         mmCPG_EDC_ATC_CNT,
1597         mmCPG_EDC_DMA_CNT,
1598         mmCPG_EDC_TAG_CNT,
1599         mmDC_EDC_CSINVOC_CNT,
1600         mmDC_EDC_RESTORE_CNT,
1601         mmDC_EDC_STATE_CNT,
1602         mmGDS_EDC_CNT,
1603         mmGDS_EDC_GRBM_CNT,
1604         mmGDS_EDC_OA_DED,
1605         mmSPI_EDC_CNT,
1606         mmSQC_ATC_EDC_GATCL1_CNT,
1607         mmSQC_EDC_CNT,
1608         mmSQ_EDC_DED_CNT,
1609         mmSQ_EDC_INFO,
1610         mmSQ_EDC_SEC_CNT,
1611         mmTCC_EDC_CNT,
1612         mmTCP_ATC_EDC_GATCL1_CNT,
1613         mmTCP_EDC_CNT,
1614         mmTD_EDC_CNT
1615 };
1616
1617 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1618 {
1619         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1620         struct amdgpu_ib ib;
1621         struct dma_fence *f = NULL;
1622         int r, i;
1623         u32 tmp;
1624         unsigned total_size, vgpr_offset, sgpr_offset;
1625         u64 gpu_addr;
1626
1627         /* only supported on CZ */
1628         if (adev->asic_type != CHIP_CARRIZO)
1629                 return 0;
1630
1631         /* bail if the compute ring is not ready */
1632         if (!ring->ready)
1633                 return 0;
1634
1635         tmp = RREG32(mmGB_EDC_MODE);
1636         WREG32(mmGB_EDC_MODE, 0);
1637
1638         total_size =
1639                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1640         total_size +=
1641                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1642         total_size +=
1643                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1644         total_size = ALIGN(total_size, 256);
1645         vgpr_offset = total_size;
1646         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1647         sgpr_offset = total_size;
1648         total_size += sizeof(sgpr_init_compute_shader);
1649
1650         /* allocate an indirect buffer to put the commands in */
1651         memset(&ib, 0, sizeof(ib));
1652         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1653         if (r) {
1654                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1655                 return r;
1656         }
1657
1658         /* load the compute shaders */
1659         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1660                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1661
1662         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1663                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1664
1665         /* init the ib length to 0 */
1666         ib.length_dw = 0;
1667
1668         /* VGPR */
1669         /* write the register state for the compute dispatch */
1670         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1671                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1672                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1673                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1674         }
1675         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1676         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1677         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1678         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1679         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1680         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1681
1682         /* write dispatch packet */
1683         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1684         ib.ptr[ib.length_dw++] = 8; /* x */
1685         ib.ptr[ib.length_dw++] = 1; /* y */
1686         ib.ptr[ib.length_dw++] = 1; /* z */
1687         ib.ptr[ib.length_dw++] =
1688                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1689
1690         /* write CS partial flush packet */
1691         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1692         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1693
1694         /* SGPR1 */
1695         /* write the register state for the compute dispatch */
1696         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1697                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1698                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1699                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1700         }
1701         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1702         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1703         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1704         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1705         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1706         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1707
1708         /* write dispatch packet */
1709         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1710         ib.ptr[ib.length_dw++] = 8; /* x */
1711         ib.ptr[ib.length_dw++] = 1; /* y */
1712         ib.ptr[ib.length_dw++] = 1; /* z */
1713         ib.ptr[ib.length_dw++] =
1714                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1715
1716         /* write CS partial flush packet */
1717         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1718         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1719
1720         /* SGPR2 */
1721         /* write the register state for the compute dispatch */
1722         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1723                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1724                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1725                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1726         }
1727         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1728         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1729         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1730         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1731         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1732         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1733
1734         /* write dispatch packet */
1735         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1736         ib.ptr[ib.length_dw++] = 8; /* x */
1737         ib.ptr[ib.length_dw++] = 1; /* y */
1738         ib.ptr[ib.length_dw++] = 1; /* z */
1739         ib.ptr[ib.length_dw++] =
1740                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1741
1742         /* write CS partial flush packet */
1743         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1744         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1745
1746         /* shedule the ib on the ring */
1747         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1748         if (r) {
1749                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1750                 goto fail;
1751         }
1752
1753         /* wait for the GPU to finish processing the IB */
1754         r = dma_fence_wait(f, false);
1755         if (r) {
1756                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1757                 goto fail;
1758         }
1759
1760         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1761         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1762         WREG32(mmGB_EDC_MODE, tmp);
1763
1764         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1765         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1766         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1767
1768
1769         /* read back registers to clear the counters */
1770         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1771                 RREG32(sec_ded_counter_registers[i]);
1772
1773 fail:
1774         amdgpu_ib_free(adev, &ib, NULL);
1775         dma_fence_put(f);
1776
1777         return r;
1778 }
1779
1780 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1781 {
1782         u32 gb_addr_config;
1783         u32 mc_shared_chmap, mc_arb_ramcfg;
1784         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1785         u32 tmp;
1786         int ret;
1787
1788         switch (adev->asic_type) {
1789         case CHIP_TOPAZ:
1790                 adev->gfx.config.max_shader_engines = 1;
1791                 adev->gfx.config.max_tile_pipes = 2;
1792                 adev->gfx.config.max_cu_per_sh = 6;
1793                 adev->gfx.config.max_sh_per_se = 1;
1794                 adev->gfx.config.max_backends_per_se = 2;
1795                 adev->gfx.config.max_texture_channel_caches = 2;
1796                 adev->gfx.config.max_gprs = 256;
1797                 adev->gfx.config.max_gs_threads = 32;
1798                 adev->gfx.config.max_hw_contexts = 8;
1799
1800                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1801                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1802                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1803                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1804                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1805                 break;
1806         case CHIP_FIJI:
1807                 adev->gfx.config.max_shader_engines = 4;
1808                 adev->gfx.config.max_tile_pipes = 16;
1809                 adev->gfx.config.max_cu_per_sh = 16;
1810                 adev->gfx.config.max_sh_per_se = 1;
1811                 adev->gfx.config.max_backends_per_se = 4;
1812                 adev->gfx.config.max_texture_channel_caches = 16;
1813                 adev->gfx.config.max_gprs = 256;
1814                 adev->gfx.config.max_gs_threads = 32;
1815                 adev->gfx.config.max_hw_contexts = 8;
1816
1817                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1818                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1819                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1820                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1821                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1822                 break;
1823         case CHIP_POLARIS11:
1824         case CHIP_POLARIS12:
1825                 ret = amdgpu_atombios_get_gfx_info(adev);
1826                 if (ret)
1827                         return ret;
1828                 adev->gfx.config.max_gprs = 256;
1829                 adev->gfx.config.max_gs_threads = 32;
1830                 adev->gfx.config.max_hw_contexts = 8;
1831
1832                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1833                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1834                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1835                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1836                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1837                 break;
1838         case CHIP_POLARIS10:
1839         case CHIP_VEGAM:
1840                 ret = amdgpu_atombios_get_gfx_info(adev);
1841                 if (ret)
1842                         return ret;
1843                 adev->gfx.config.max_gprs = 256;
1844                 adev->gfx.config.max_gs_threads = 32;
1845                 adev->gfx.config.max_hw_contexts = 8;
1846
1847                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1848                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1849                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1850                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1851                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1852                 break;
1853         case CHIP_TONGA:
1854                 adev->gfx.config.max_shader_engines = 4;
1855                 adev->gfx.config.max_tile_pipes = 8;
1856                 adev->gfx.config.max_cu_per_sh = 8;
1857                 adev->gfx.config.max_sh_per_se = 1;
1858                 adev->gfx.config.max_backends_per_se = 2;
1859                 adev->gfx.config.max_texture_channel_caches = 8;
1860                 adev->gfx.config.max_gprs = 256;
1861                 adev->gfx.config.max_gs_threads = 32;
1862                 adev->gfx.config.max_hw_contexts = 8;
1863
1864                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1865                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1866                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1867                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1868                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1869                 break;
1870         case CHIP_CARRIZO:
1871                 adev->gfx.config.max_shader_engines = 1;
1872                 adev->gfx.config.max_tile_pipes = 2;
1873                 adev->gfx.config.max_sh_per_se = 1;
1874                 adev->gfx.config.max_backends_per_se = 2;
1875                 adev->gfx.config.max_cu_per_sh = 8;
1876                 adev->gfx.config.max_texture_channel_caches = 2;
1877                 adev->gfx.config.max_gprs = 256;
1878                 adev->gfx.config.max_gs_threads = 32;
1879                 adev->gfx.config.max_hw_contexts = 8;
1880
1881                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1882                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1883                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1884                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1885                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1886                 break;
1887         case CHIP_STONEY:
1888                 adev->gfx.config.max_shader_engines = 1;
1889                 adev->gfx.config.max_tile_pipes = 2;
1890                 adev->gfx.config.max_sh_per_se = 1;
1891                 adev->gfx.config.max_backends_per_se = 1;
1892                 adev->gfx.config.max_cu_per_sh = 3;
1893                 adev->gfx.config.max_texture_channel_caches = 2;
1894                 adev->gfx.config.max_gprs = 256;
1895                 adev->gfx.config.max_gs_threads = 16;
1896                 adev->gfx.config.max_hw_contexts = 8;
1897
1898                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1899                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1900                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1901                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1902                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1903                 break;
1904         default:
1905                 adev->gfx.config.max_shader_engines = 2;
1906                 adev->gfx.config.max_tile_pipes = 4;
1907                 adev->gfx.config.max_cu_per_sh = 2;
1908                 adev->gfx.config.max_sh_per_se = 1;
1909                 adev->gfx.config.max_backends_per_se = 2;
1910                 adev->gfx.config.max_texture_channel_caches = 4;
1911                 adev->gfx.config.max_gprs = 256;
1912                 adev->gfx.config.max_gs_threads = 32;
1913                 adev->gfx.config.max_hw_contexts = 8;
1914
1915                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1916                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1917                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1918                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1919                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1920                 break;
1921         }
1922
1923         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1924         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1925         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1926
1927         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1928         adev->gfx.config.mem_max_burst_length_bytes = 256;
1929         if (adev->flags & AMD_IS_APU) {
1930                 /* Get memory bank mapping mode. */
1931                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1932                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1933                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1934
1935                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1936                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1937                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1938
1939                 /* Validate settings in case only one DIMM installed. */
1940                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1941                         dimm00_addr_map = 0;
1942                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1943                         dimm01_addr_map = 0;
1944                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1945                         dimm10_addr_map = 0;
1946                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1947                         dimm11_addr_map = 0;
1948
1949                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1950                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1951                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1952                         adev->gfx.config.mem_row_size_in_kb = 2;
1953                 else
1954                         adev->gfx.config.mem_row_size_in_kb = 1;
1955         } else {
1956                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1957                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1958                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1959                         adev->gfx.config.mem_row_size_in_kb = 4;
1960         }
1961
1962         adev->gfx.config.shader_engine_tile_size = 32;
1963         adev->gfx.config.num_gpus = 1;
1964         adev->gfx.config.multi_gpu_tile_size = 64;
1965
1966         /* fix up row size */
1967         switch (adev->gfx.config.mem_row_size_in_kb) {
1968         case 1:
1969         default:
1970                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1971                 break;
1972         case 2:
1973                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1974                 break;
1975         case 4:
1976                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1977                 break;
1978         }
1979         adev->gfx.config.gb_addr_config = gb_addr_config;
1980
1981         return 0;
1982 }
1983
1984 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1985                                         int mec, int pipe, int queue)
1986 {
1987         int r;
1988         unsigned irq_type;
1989         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1990
1991         ring = &adev->gfx.compute_ring[ring_id];
1992
1993         /* mec0 is me1 */
1994         ring->me = mec + 1;
1995         ring->pipe = pipe;
1996         ring->queue = queue;
1997
1998         ring->ring_obj = NULL;
1999         ring->use_doorbell = true;
2000         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
2001         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2002                                 + (ring_id * GFX8_MEC_HPD_SIZE);
2003         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2004
2005         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2006                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2007                 + ring->pipe;
2008
2009         /* type-2 packets are deprecated on MEC, use type-3 instead */
2010         r = amdgpu_ring_init(adev, ring, 1024,
2011                         &adev->gfx.eop_irq, irq_type);
2012         if (r)
2013                 return r;
2014
2015
2016         return 0;
2017 }
2018
2019 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
2020
2021 static int gfx_v8_0_sw_init(void *handle)
2022 {
2023         int i, j, k, r, ring_id;
2024         struct amdgpu_ring *ring;
2025         struct amdgpu_kiq *kiq;
2026         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2027
2028         switch (adev->asic_type) {
2029         case CHIP_TONGA:
2030         case CHIP_CARRIZO:
2031         case CHIP_FIJI:
2032         case CHIP_POLARIS10:
2033         case CHIP_POLARIS11:
2034         case CHIP_POLARIS12:
2035         case CHIP_VEGAM:
2036                 adev->gfx.mec.num_mec = 2;
2037                 break;
2038         case CHIP_TOPAZ:
2039         case CHIP_STONEY:
2040         default:
2041                 adev->gfx.mec.num_mec = 1;
2042                 break;
2043         }
2044
2045         adev->gfx.mec.num_pipe_per_mec = 4;
2046         adev->gfx.mec.num_queue_per_pipe = 8;
2047
2048         /* EOP Event */
2049         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
2050         if (r)
2051                 return r;
2052
2053         /* Privileged reg */
2054         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
2055                               &adev->gfx.priv_reg_irq);
2056         if (r)
2057                 return r;
2058
2059         /* Privileged inst */
2060         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
2061                               &adev->gfx.priv_inst_irq);
2062         if (r)
2063                 return r;
2064
2065         /* Add CP EDC/ECC irq  */
2066         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
2067                               &adev->gfx.cp_ecc_error_irq);
2068         if (r)
2069                 return r;
2070
2071         /* SQ interrupts. */
2072         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
2073                               &adev->gfx.sq_irq);
2074         if (r) {
2075                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2076                 return r;
2077         }
2078
2079         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2080
2081         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2082
2083         gfx_v8_0_scratch_init(adev);
2084
2085         r = gfx_v8_0_init_microcode(adev);
2086         if (r) {
2087                 DRM_ERROR("Failed to load gfx firmware!\n");
2088                 return r;
2089         }
2090
2091         r = gfx_v8_0_rlc_init(adev);
2092         if (r) {
2093                 DRM_ERROR("Failed to init rlc BOs!\n");
2094                 return r;
2095         }
2096
2097         r = gfx_v8_0_mec_init(adev);
2098         if (r) {
2099                 DRM_ERROR("Failed to init MEC BOs!\n");
2100                 return r;
2101         }
2102
2103         /* set up the gfx ring */
2104         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2105                 ring = &adev->gfx.gfx_ring[i];
2106                 ring->ring_obj = NULL;
2107                 sprintf(ring->name, "gfx");
2108                 /* no gfx doorbells on iceland */
2109                 if (adev->asic_type != CHIP_TOPAZ) {
2110                         ring->use_doorbell = true;
2111                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2112                 }
2113
2114                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2115                                      AMDGPU_CP_IRQ_GFX_EOP);
2116                 if (r)
2117                         return r;
2118         }
2119
2120
2121         /* set up the compute queues - allocate horizontally across pipes */
2122         ring_id = 0;
2123         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2124                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2125                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2126                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2127                                         continue;
2128
2129                                 r = gfx_v8_0_compute_ring_init(adev,
2130                                                                 ring_id,
2131                                                                 i, k, j);
2132                                 if (r)
2133                                         return r;
2134
2135                                 ring_id++;
2136                         }
2137                 }
2138         }
2139
2140         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2141         if (r) {
2142                 DRM_ERROR("Failed to init KIQ BOs!\n");
2143                 return r;
2144         }
2145
2146         kiq = &adev->gfx.kiq;
2147         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2148         if (r)
2149                 return r;
2150
2151         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2152         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2153         if (r)
2154                 return r;
2155
2156         adev->gfx.ce_ram_size = 0x8000;
2157
2158         r = gfx_v8_0_gpu_early_init(adev);
2159         if (r)
2160                 return r;
2161
2162         return 0;
2163 }
2164
2165 static int gfx_v8_0_sw_fini(void *handle)
2166 {
2167         int i;
2168         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2169
2170         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2171         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2172         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2173
2174         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2175                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2176         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2177                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2178
2179         amdgpu_gfx_compute_mqd_sw_fini(adev);
2180         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2181         amdgpu_gfx_kiq_fini(adev);
2182
2183         gfx_v8_0_mec_fini(adev);
2184         gfx_v8_0_rlc_fini(adev);
2185         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2186                                 &adev->gfx.rlc.clear_state_gpu_addr,
2187                                 (void **)&adev->gfx.rlc.cs_ptr);
2188         if ((adev->asic_type == CHIP_CARRIZO) ||
2189             (adev->asic_type == CHIP_STONEY)) {
2190                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2191                                 &adev->gfx.rlc.cp_table_gpu_addr,
2192                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2193         }
2194         gfx_v8_0_free_microcode(adev);
2195
2196         return 0;
2197 }
2198
2199 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2200 {
2201         uint32_t *modearray, *mod2array;
2202         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2203         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2204         u32 reg_offset;
2205
2206         modearray = adev->gfx.config.tile_mode_array;
2207         mod2array = adev->gfx.config.macrotile_mode_array;
2208
2209         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2210                 modearray[reg_offset] = 0;
2211
2212         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2213                 mod2array[reg_offset] = 0;
2214
2215         switch (adev->asic_type) {
2216         case CHIP_TOPAZ:
2217                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2218                                 PIPE_CONFIG(ADDR_SURF_P2) |
2219                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2220                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2221                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2222                                 PIPE_CONFIG(ADDR_SURF_P2) |
2223                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2224                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2225                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2226                                 PIPE_CONFIG(ADDR_SURF_P2) |
2227                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2228                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2229                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2230                                 PIPE_CONFIG(ADDR_SURF_P2) |
2231                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2232                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2233                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2234                                 PIPE_CONFIG(ADDR_SURF_P2) |
2235                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2236                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2237                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2238                                 PIPE_CONFIG(ADDR_SURF_P2) |
2239                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2240                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2241                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2242                                 PIPE_CONFIG(ADDR_SURF_P2) |
2243                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2244                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2245                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2246                                 PIPE_CONFIG(ADDR_SURF_P2));
2247                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2248                                 PIPE_CONFIG(ADDR_SURF_P2) |
2249                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2250                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2251                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2252                                  PIPE_CONFIG(ADDR_SURF_P2) |
2253                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2254                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2255                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2256                                  PIPE_CONFIG(ADDR_SURF_P2) |
2257                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2258                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2259                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2260                                  PIPE_CONFIG(ADDR_SURF_P2) |
2261                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2262                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2263                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2264                                  PIPE_CONFIG(ADDR_SURF_P2) |
2265                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2266                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2267                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2268                                  PIPE_CONFIG(ADDR_SURF_P2) |
2269                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2270                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2271                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2272                                  PIPE_CONFIG(ADDR_SURF_P2) |
2273                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2274                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2275                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2276                                  PIPE_CONFIG(ADDR_SURF_P2) |
2277                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2278                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2279                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2280                                  PIPE_CONFIG(ADDR_SURF_P2) |
2281                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2282                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2283                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2284                                  PIPE_CONFIG(ADDR_SURF_P2) |
2285                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2286                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2287                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2288                                  PIPE_CONFIG(ADDR_SURF_P2) |
2289                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2290                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2291                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2292                                  PIPE_CONFIG(ADDR_SURF_P2) |
2293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2295                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2296                                  PIPE_CONFIG(ADDR_SURF_P2) |
2297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2299                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2300                                  PIPE_CONFIG(ADDR_SURF_P2) |
2301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2303                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2304                                  PIPE_CONFIG(ADDR_SURF_P2) |
2305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2307                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2308                                  PIPE_CONFIG(ADDR_SURF_P2) |
2309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2311                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2312                                  PIPE_CONFIG(ADDR_SURF_P2) |
2313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2315                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2316                                  PIPE_CONFIG(ADDR_SURF_P2) |
2317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2319
2320                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2321                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2322                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2323                                 NUM_BANKS(ADDR_SURF_8_BANK));
2324                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2325                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2326                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2327                                 NUM_BANKS(ADDR_SURF_8_BANK));
2328                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2329                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2330                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2331                                 NUM_BANKS(ADDR_SURF_8_BANK));
2332                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2333                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2334                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2335                                 NUM_BANKS(ADDR_SURF_8_BANK));
2336                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2337                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2338                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2339                                 NUM_BANKS(ADDR_SURF_8_BANK));
2340                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2341                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2342                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2343                                 NUM_BANKS(ADDR_SURF_8_BANK));
2344                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2345                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2346                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2347                                 NUM_BANKS(ADDR_SURF_8_BANK));
2348                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2349                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2350                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2351                                 NUM_BANKS(ADDR_SURF_16_BANK));
2352                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2353                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2354                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2355                                 NUM_BANKS(ADDR_SURF_16_BANK));
2356                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2357                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2358                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2359                                  NUM_BANKS(ADDR_SURF_16_BANK));
2360                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2361                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2362                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2363                                  NUM_BANKS(ADDR_SURF_16_BANK));
2364                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2366                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2367                                  NUM_BANKS(ADDR_SURF_16_BANK));
2368                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2370                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2371                                  NUM_BANKS(ADDR_SURF_16_BANK));
2372                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2373                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2374                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2375                                  NUM_BANKS(ADDR_SURF_8_BANK));
2376
2377                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2378                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2379                             reg_offset != 23)
2380                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2381
2382                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2383                         if (reg_offset != 7)
2384                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2385
2386                 break;
2387         case CHIP_FIJI:
2388         case CHIP_VEGAM:
2389                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2392                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2393                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2396                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2397                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2400                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2401                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2404                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2405                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2408                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2409                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2410                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2412                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2413                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2414                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2416                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2417                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2418                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2419                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2420                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2421                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2422                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2423                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2424                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2426                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2428                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2429                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2430                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2432                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2434                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2435                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2436                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2437                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2439                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2440                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2441                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2443                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2445                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2446                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2447                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2448                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2449                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2450                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2451                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2452                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2453                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2454                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2455                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2456                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2457                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2458                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2459                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2460                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2461                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2462                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2463                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2464                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2465                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2466                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2467                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2468                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2469                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2470                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2471                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2472                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2473                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2474                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2475                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2476                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2477                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2478                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2479                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2480                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2481                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2482                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2483                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2484                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2485                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2486                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2487                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2488                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2489                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2490                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2491                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2492                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2493                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2494                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2495                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2496                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2497                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2498                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2499                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2500                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2501                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2502                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2503                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2504                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2505                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2506                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2507                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2508                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2509                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2510                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2511
2512                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2514                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2515                                 NUM_BANKS(ADDR_SURF_8_BANK));
2516                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2518                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2519                                 NUM_BANKS(ADDR_SURF_8_BANK));
2520                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2523                                 NUM_BANKS(ADDR_SURF_8_BANK));
2524                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2526                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2527                                 NUM_BANKS(ADDR_SURF_8_BANK));
2528                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2530                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2531                                 NUM_BANKS(ADDR_SURF_8_BANK));
2532                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2534                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2535                                 NUM_BANKS(ADDR_SURF_8_BANK));
2536                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2538                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2539                                 NUM_BANKS(ADDR_SURF_8_BANK));
2540                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2542                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2543                                 NUM_BANKS(ADDR_SURF_8_BANK));
2544                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2546                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2547                                 NUM_BANKS(ADDR_SURF_8_BANK));
2548                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2550                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2551                                  NUM_BANKS(ADDR_SURF_8_BANK));
2552                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2553                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2554                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2555                                  NUM_BANKS(ADDR_SURF_8_BANK));
2556                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2558                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2559                                  NUM_BANKS(ADDR_SURF_8_BANK));
2560                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2562                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2563                                  NUM_BANKS(ADDR_SURF_8_BANK));
2564                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2565                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2566                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2567                                  NUM_BANKS(ADDR_SURF_4_BANK));
2568
2569                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2570                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2571
2572                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2573                         if (reg_offset != 7)
2574                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2575
2576                 break;
2577         case CHIP_TONGA:
2578                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2581                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2582                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2585                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2586                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2587                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2589                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2590                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2593                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2594                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2597                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2598                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2599                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2601                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2602                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2603                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2605                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2606                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2607                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2608                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2609                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2610                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2611                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2612                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2613                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2615                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2616                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2617                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2619                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2621                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2622                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2623                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2624                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2626                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2627                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2628                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2629                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2630                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2631                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2632                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2633                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2634                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2635                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2636                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2637                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2638                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2639                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2640                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2641                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2642                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2643                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2644                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2645                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2646                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2647                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2648                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2649                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2650                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2651                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2652                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2653                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2654                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2655                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2656                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2657                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2658                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2659                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2660                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2661                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2662                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2663                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2664                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2665                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2666                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2667                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2668                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2669                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2671                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2672                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2673                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2674                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2675                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2676                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2677                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2678                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2679                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2680                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2681                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2682                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2683                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2684                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2685                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2686                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2687                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2688                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2690                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2691                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2693                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2694                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2695                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2696                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2697                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2699                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2700
2701                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2703                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2704                                 NUM_BANKS(ADDR_SURF_16_BANK));
2705                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2706                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2707                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2708                                 NUM_BANKS(ADDR_SURF_16_BANK));
2709                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2711                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2712                                 NUM_BANKS(ADDR_SURF_16_BANK));
2713                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2714                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2715                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2716                                 NUM_BANKS(ADDR_SURF_16_BANK));
2717                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2718                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2719                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2720                                 NUM_BANKS(ADDR_SURF_16_BANK));
2721                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2723                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2724                                 NUM_BANKS(ADDR_SURF_16_BANK));
2725                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2727                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2728                                 NUM_BANKS(ADDR_SURF_16_BANK));
2729                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2730                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2731                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2732                                 NUM_BANKS(ADDR_SURF_16_BANK));
2733                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2734                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2735                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2736                                 NUM_BANKS(ADDR_SURF_16_BANK));
2737                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2738                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2739                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2740                                  NUM_BANKS(ADDR_SURF_16_BANK));
2741                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2742                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2743                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2744                                  NUM_BANKS(ADDR_SURF_16_BANK));
2745                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2747                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2748                                  NUM_BANKS(ADDR_SURF_8_BANK));
2749                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2750                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2751                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2752                                  NUM_BANKS(ADDR_SURF_4_BANK));
2753                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2754                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2755                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2756                                  NUM_BANKS(ADDR_SURF_4_BANK));
2757
2758                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2759                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2760
2761                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2762                         if (reg_offset != 7)
2763                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2764
2765                 break;
2766         case CHIP_POLARIS11:
2767         case CHIP_POLARIS12:
2768                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2771                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2772                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2773                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2774                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2775                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2776                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2777                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2779                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2780                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2783                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2784                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2787                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2788                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2789                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2791                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2792                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2793                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2795                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2796                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2797                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2798                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2799                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2800                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2801                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2802                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2803                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2804                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2805                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2806                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2807                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2809                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2810                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2811                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2812                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2813                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2814                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2815                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2816                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2817                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2818                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2819                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2820                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2821                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2823                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2825                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2826                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2827                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2828                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2829                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2830                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2831                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2832                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2833                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2834                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2835                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2836                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2837                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2838                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2839                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2840                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2841                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2842                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2843                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2844                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2845                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2846                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2847                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2849                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2850                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2851                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2852                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2853                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2854                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2855                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2856                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2857                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2858                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2859                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2860                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2861                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2862                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2863                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2864                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2865                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2866                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2867                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2868                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2869                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2870                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2871                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2872                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2873                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2874                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2875                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2876                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2877                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2878                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2879                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2881                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2882                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2883                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2884                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2885                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2886                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2887                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2888                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2889                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2890
2891                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2892                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2893                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2894                                 NUM_BANKS(ADDR_SURF_16_BANK));
2895
2896                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2897                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2898                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2899                                 NUM_BANKS(ADDR_SURF_16_BANK));
2900
2901                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2902                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2903                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2904                                 NUM_BANKS(ADDR_SURF_16_BANK));
2905
2906                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2907                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2908                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2909                                 NUM_BANKS(ADDR_SURF_16_BANK));
2910
2911                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2912                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2913                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2914                                 NUM_BANKS(ADDR_SURF_16_BANK));
2915
2916                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2917                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2918                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2919                                 NUM_BANKS(ADDR_SURF_16_BANK));
2920
2921                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2922                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2923                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2924                                 NUM_BANKS(ADDR_SURF_16_BANK));
2925
2926                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2927                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2928                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929                                 NUM_BANKS(ADDR_SURF_16_BANK));
2930
2931                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2932                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2933                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2934                                 NUM_BANKS(ADDR_SURF_16_BANK));
2935
2936                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2937                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2938                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2939                                 NUM_BANKS(ADDR_SURF_16_BANK));
2940
2941                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2942                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2943                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2944                                 NUM_BANKS(ADDR_SURF_16_BANK));
2945
2946                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2947                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2948                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2949                                 NUM_BANKS(ADDR_SURF_16_BANK));
2950
2951                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2952                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2953                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2954                                 NUM_BANKS(ADDR_SURF_8_BANK));
2955
2956                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2957                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2958                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2959                                 NUM_BANKS(ADDR_SURF_4_BANK));
2960
2961                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2962                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2963
2964                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2965                         if (reg_offset != 7)
2966                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2967
2968                 break;
2969         case CHIP_POLARIS10:
2970                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2971                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2972                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2973                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2974                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2975                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2976                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2977                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2978                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2979                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2980                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2981                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2982                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2983                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2984                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2985                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2986                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2987                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2988                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2989                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2990                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2991                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2992                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2993                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2994                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2995                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2996                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2997                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2998                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2999                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3000                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3001                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3002                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3003                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3004                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3005                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3006                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3007                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3008                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3009                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3010                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3011                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3012                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3013                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3014                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3015                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3016                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3017                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3018                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3019                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3020                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3021                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3022                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3023                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3024                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3025                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3026                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3027                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3028                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3029                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3030                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3031                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3032                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3033                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3034                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3035                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3036                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3037                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3038                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3039                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3040                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3041                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3042                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3043                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3044                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3045                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3046                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3047                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3048                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3049                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3050                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3051                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3052                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3053                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3054                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3055                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3056                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3057                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3058                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3059                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3060                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3061                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3062                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3063                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3064                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3065                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3066                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3067                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3068                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3069                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3070                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3071                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3072                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3073                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3074                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3075                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3076                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3077                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3078                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3079                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3080                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3081                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3082                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3083                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3084                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3085                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3086                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3087                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3088                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3089                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3090                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3091                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3092
3093                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3095                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3096                                 NUM_BANKS(ADDR_SURF_16_BANK));
3097
3098                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3099                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3100                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3101                                 NUM_BANKS(ADDR_SURF_16_BANK));
3102
3103                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3104                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3105                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3106                                 NUM_BANKS(ADDR_SURF_16_BANK));
3107
3108                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3109                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3110                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3111                                 NUM_BANKS(ADDR_SURF_16_BANK));
3112
3113                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3114                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3115                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3116                                 NUM_BANKS(ADDR_SURF_16_BANK));
3117
3118                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3119                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3120                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3121                                 NUM_BANKS(ADDR_SURF_16_BANK));
3122
3123                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3124                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3125                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3126                                 NUM_BANKS(ADDR_SURF_16_BANK));
3127
3128                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3129                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3130                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3131                                 NUM_BANKS(ADDR_SURF_16_BANK));
3132
3133                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3134                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3135                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3136                                 NUM_BANKS(ADDR_SURF_16_BANK));
3137
3138                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3139                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3140                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3141                                 NUM_BANKS(ADDR_SURF_16_BANK));
3142
3143                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3144                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3145                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3146                                 NUM_BANKS(ADDR_SURF_16_BANK));
3147
3148                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3149                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3150                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3151                                 NUM_BANKS(ADDR_SURF_8_BANK));
3152
3153                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3154                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3155                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3156                                 NUM_BANKS(ADDR_SURF_4_BANK));
3157
3158                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3159                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3160                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3161                                 NUM_BANKS(ADDR_SURF_4_BANK));
3162
3163                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3164                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3165
3166                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3167                         if (reg_offset != 7)
3168                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3169
3170                 break;
3171         case CHIP_STONEY:
3172                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3173                                 PIPE_CONFIG(ADDR_SURF_P2) |
3174                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3175                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3176                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3177                                 PIPE_CONFIG(ADDR_SURF_P2) |
3178                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3179                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3180                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3181                                 PIPE_CONFIG(ADDR_SURF_P2) |
3182                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3183                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3184                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3185                                 PIPE_CONFIG(ADDR_SURF_P2) |
3186                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3187                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3188                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3189                                 PIPE_CONFIG(ADDR_SURF_P2) |
3190                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3191                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3192                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3193                                 PIPE_CONFIG(ADDR_SURF_P2) |
3194                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3195                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3196                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3197                                 PIPE_CONFIG(ADDR_SURF_P2) |
3198                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3199                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3200                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3201                                 PIPE_CONFIG(ADDR_SURF_P2));
3202                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3203                                 PIPE_CONFIG(ADDR_SURF_P2) |
3204                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3205                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3206                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3207                                  PIPE_CONFIG(ADDR_SURF_P2) |
3208                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3209                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3210                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3211                                  PIPE_CONFIG(ADDR_SURF_P2) |
3212                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3213                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3214                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3215                                  PIPE_CONFIG(ADDR_SURF_P2) |
3216                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3217                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3218                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3219                                  PIPE_CONFIG(ADDR_SURF_P2) |
3220                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3221                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3222                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3223                                  PIPE_CONFIG(ADDR_SURF_P2) |
3224                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3225                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3226                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3227                                  PIPE_CONFIG(ADDR_SURF_P2) |
3228                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3229                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3230                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3231                                  PIPE_CONFIG(ADDR_SURF_P2) |
3232                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3233                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3234                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3235                                  PIPE_CONFIG(ADDR_SURF_P2) |
3236                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3237                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3238                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3239                                  PIPE_CONFIG(ADDR_SURF_P2) |
3240                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3241                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3242                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3243                                  PIPE_CONFIG(ADDR_SURF_P2) |
3244                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3245                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3246                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3247                                  PIPE_CONFIG(ADDR_SURF_P2) |
3248                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3249                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3250                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3251                                  PIPE_CONFIG(ADDR_SURF_P2) |
3252                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3253                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3254                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3255                                  PIPE_CONFIG(ADDR_SURF_P2) |
3256                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3257                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3258                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3259                                  PIPE_CONFIG(ADDR_SURF_P2) |
3260                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3261                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3262                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3263                                  PIPE_CONFIG(ADDR_SURF_P2) |
3264                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3265                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3266                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3267                                  PIPE_CONFIG(ADDR_SURF_P2) |
3268                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3269                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3270                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3271                                  PIPE_CONFIG(ADDR_SURF_P2) |
3272                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3273                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3274
3275                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3276                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3277                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3278                                 NUM_BANKS(ADDR_SURF_8_BANK));
3279                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3280                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3281                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3282                                 NUM_BANKS(ADDR_SURF_8_BANK));
3283                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3284                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3285                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3286                                 NUM_BANKS(ADDR_SURF_8_BANK));
3287                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3288                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3289                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3290                                 NUM_BANKS(ADDR_SURF_8_BANK));
3291                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3292                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3293                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3294                                 NUM_BANKS(ADDR_SURF_8_BANK));
3295                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3296                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3297                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3298                                 NUM_BANKS(ADDR_SURF_8_BANK));
3299                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3300                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3301                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3302                                 NUM_BANKS(ADDR_SURF_8_BANK));
3303                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3304                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3305                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3306                                 NUM_BANKS(ADDR_SURF_16_BANK));
3307                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3308                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3309                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3310                                 NUM_BANKS(ADDR_SURF_16_BANK));
3311                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3312                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3313                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3314                                  NUM_BANKS(ADDR_SURF_16_BANK));
3315                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3316                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3317                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3318                                  NUM_BANKS(ADDR_SURF_16_BANK));
3319                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3320                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3321                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3322                                  NUM_BANKS(ADDR_SURF_16_BANK));
3323                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3324                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3325                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3326                                  NUM_BANKS(ADDR_SURF_16_BANK));
3327                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3328                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3329                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3330                                  NUM_BANKS(ADDR_SURF_8_BANK));
3331
3332                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3333                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3334                             reg_offset != 23)
3335                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3336
3337                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3338                         if (reg_offset != 7)
3339                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3340
3341                 break;
3342         default:
3343                 dev_warn(adev->dev,
3344                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3345                          adev->asic_type);
3346
3347         case CHIP_CARRIZO:
3348                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3349                                 PIPE_CONFIG(ADDR_SURF_P2) |
3350                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3351                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3352                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3353                                 PIPE_CONFIG(ADDR_SURF_P2) |
3354                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3355                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3356                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3357                                 PIPE_CONFIG(ADDR_SURF_P2) |
3358                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3359                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3360                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3361                                 PIPE_CONFIG(ADDR_SURF_P2) |
3362                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3363                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3364                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3365                                 PIPE_CONFIG(ADDR_SURF_P2) |
3366                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3367                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3368                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3369                                 PIPE_CONFIG(ADDR_SURF_P2) |
3370                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3371                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3372                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3373                                 PIPE_CONFIG(ADDR_SURF_P2) |
3374                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3375                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3376                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3377                                 PIPE_CONFIG(ADDR_SURF_P2));
3378                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3379                                 PIPE_CONFIG(ADDR_SURF_P2) |
3380                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3381                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3382                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3383                                  PIPE_CONFIG(ADDR_SURF_P2) |
3384                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3385                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3386                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3387                                  PIPE_CONFIG(ADDR_SURF_P2) |
3388                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3389                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3390                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3391                                  PIPE_CONFIG(ADDR_SURF_P2) |
3392                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3393                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3394                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3395                                  PIPE_CONFIG(ADDR_SURF_P2) |
3396                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3397                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3398                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3399                                  PIPE_CONFIG(ADDR_SURF_P2) |
3400                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3401                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3402                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3403                                  PIPE_CONFIG(ADDR_SURF_P2) |
3404                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3405                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3406                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3407                                  PIPE_CONFIG(ADDR_SURF_P2) |
3408                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3409                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3410                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3411                                  PIPE_CONFIG(ADDR_SURF_P2) |
3412                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3413                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3414                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3415                                  PIPE_CONFIG(ADDR_SURF_P2) |
3416                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3417                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3418                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3419                                  PIPE_CONFIG(ADDR_SURF_P2) |
3420                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3421                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3422                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3423                                  PIPE_CONFIG(ADDR_SURF_P2) |
3424                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3425                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3426                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3427                                  PIPE_CONFIG(ADDR_SURF_P2) |
3428                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3429                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3430                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3431                                  PIPE_CONFIG(ADDR_SURF_P2) |
3432                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3433                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3434                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3435                                  PIPE_CONFIG(ADDR_SURF_P2) |
3436                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3437                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3438                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3439                                  PIPE_CONFIG(ADDR_SURF_P2) |
3440                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3441                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3442                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3443                                  PIPE_CONFIG(ADDR_SURF_P2) |
3444                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3445                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3446                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3447                                  PIPE_CONFIG(ADDR_SURF_P2) |
3448                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3449                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3450
3451                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3452                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3453                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3454                                 NUM_BANKS(ADDR_SURF_8_BANK));
3455                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3456                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3457                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3458                                 NUM_BANKS(ADDR_SURF_8_BANK));
3459                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3460                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3461                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3462                                 NUM_BANKS(ADDR_SURF_8_BANK));
3463                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3464                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3465                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3466                                 NUM_BANKS(ADDR_SURF_8_BANK));
3467                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3468                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3469                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3470                                 NUM_BANKS(ADDR_SURF_8_BANK));
3471                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3472                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3473                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3474                                 NUM_BANKS(ADDR_SURF_8_BANK));
3475                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3476                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3477                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3478                                 NUM_BANKS(ADDR_SURF_8_BANK));
3479                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3480                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3481                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3482                                 NUM_BANKS(ADDR_SURF_16_BANK));
3483                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3484                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3485                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3486                                 NUM_BANKS(ADDR_SURF_16_BANK));
3487                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3488                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3489                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3490                                  NUM_BANKS(ADDR_SURF_16_BANK));
3491                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3492                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3493                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3494                                  NUM_BANKS(ADDR_SURF_16_BANK));
3495                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3496                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3497                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3498                                  NUM_BANKS(ADDR_SURF_16_BANK));
3499                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3500                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3501                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3502                                  NUM_BANKS(ADDR_SURF_16_BANK));
3503                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3504                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3505                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3506                                  NUM_BANKS(ADDR_SURF_8_BANK));
3507
3508                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3509                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3510                             reg_offset != 23)
3511                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3512
3513                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3514                         if (reg_offset != 7)
3515                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3516
3517                 break;
3518         }
3519 }
3520
3521 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3522                                   u32 se_num, u32 sh_num, u32 instance)
3523 {
3524         u32 data;
3525
3526         if (instance == 0xffffffff)
3527                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3528         else
3529                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3530
3531         if (se_num == 0xffffffff)
3532                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3533         else
3534                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3535
3536         if (sh_num == 0xffffffff)
3537                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3538         else
3539                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3540
3541         WREG32(mmGRBM_GFX_INDEX, data);
3542 }
3543
3544 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3545                                   u32 me, u32 pipe, u32 q)
3546 {
3547         vi_srbm_select(adev, me, pipe, q, 0);
3548 }
3549
3550 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3551 {
3552         u32 data, mask;
3553
3554         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3555                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3556
3557         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3558
3559         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3560                                          adev->gfx.config.max_sh_per_se);
3561
3562         return (~data) & mask;
3563 }
3564
3565 static void
3566 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3567 {
3568         switch (adev->asic_type) {
3569         case CHIP_FIJI:
3570         case CHIP_VEGAM:
3571                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3572                           RB_XSEL2(1) | PKR_MAP(2) |
3573                           PKR_XSEL(1) | PKR_YSEL(1) |
3574                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3575                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3576                            SE_PAIR_YSEL(2);
3577                 break;
3578         case CHIP_TONGA:
3579         case CHIP_POLARIS10:
3580                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3581                           SE_XSEL(1) | SE_YSEL(1);
3582                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3583                            SE_PAIR_YSEL(2);
3584                 break;
3585         case CHIP_TOPAZ:
3586         case CHIP_CARRIZO:
3587                 *rconf |= RB_MAP_PKR0(2);
3588                 *rconf1 |= 0x0;
3589                 break;
3590         case CHIP_POLARIS11:
3591         case CHIP_POLARIS12:
3592                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3593                           SE_XSEL(1) | SE_YSEL(1);
3594                 *rconf1 |= 0x0;
3595                 break;
3596         case CHIP_STONEY:
3597                 *rconf |= 0x0;
3598                 *rconf1 |= 0x0;
3599                 break;
3600         default:
3601                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3602                 break;
3603         }
3604 }
3605
3606 static void
3607 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3608                                         u32 raster_config, u32 raster_config_1,
3609                                         unsigned rb_mask, unsigned num_rb)
3610 {
3611         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3612         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3613         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3614         unsigned rb_per_se = num_rb / num_se;
3615         unsigned se_mask[4];
3616         unsigned se;
3617
3618         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3619         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3620         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3621         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3622
3623         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3624         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3625         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3626
3627         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3628                              (!se_mask[2] && !se_mask[3]))) {
3629                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3630
3631                 if (!se_mask[0] && !se_mask[1]) {
3632                         raster_config_1 |=
3633                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3634                 } else {
3635                         raster_config_1 |=
3636                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3637                 }
3638         }
3639
3640         for (se = 0; se < num_se; se++) {
3641                 unsigned raster_config_se = raster_config;
3642                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3643                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3644                 int idx = (se / 2) * 2;
3645
3646                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3647                         raster_config_se &= ~SE_MAP_MASK;
3648
3649                         if (!se_mask[idx]) {
3650                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3651                         } else {
3652                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3653                         }
3654                 }
3655
3656                 pkr0_mask &= rb_mask;
3657                 pkr1_mask &= rb_mask;
3658                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3659                         raster_config_se &= ~PKR_MAP_MASK;
3660
3661                         if (!pkr0_mask) {
3662                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3663                         } else {
3664                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3665                         }
3666                 }
3667
3668                 if (rb_per_se >= 2) {
3669                         unsigned rb0_mask = 1 << (se * rb_per_se);
3670                         unsigned rb1_mask = rb0_mask << 1;
3671
3672                         rb0_mask &= rb_mask;
3673                         rb1_mask &= rb_mask;
3674                         if (!rb0_mask || !rb1_mask) {
3675                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3676
3677                                 if (!rb0_mask) {
3678                                         raster_config_se |=
3679                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3680                                 } else {
3681                                         raster_config_se |=
3682                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3683                                 }
3684                         }
3685
3686                         if (rb_per_se > 2) {
3687                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3688                                 rb1_mask = rb0_mask << 1;
3689                                 rb0_mask &= rb_mask;
3690                                 rb1_mask &= rb_mask;
3691                                 if (!rb0_mask || !rb1_mask) {
3692                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3693
3694                                         if (!rb0_mask) {
3695                                                 raster_config_se |=
3696                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3697                                         } else {
3698                                                 raster_config_se |=
3699                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3700                                         }
3701                                 }
3702                         }
3703                 }
3704
3705                 /* GRBM_GFX_INDEX has a different offset on VI */
3706                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3707                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3708                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3709         }
3710
3711         /* GRBM_GFX_INDEX has a different offset on VI */
3712         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3713 }
3714
3715 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3716 {
3717         int i, j;
3718         u32 data;
3719         u32 raster_config = 0, raster_config_1 = 0;
3720         u32 active_rbs = 0;
3721         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3722                                         adev->gfx.config.max_sh_per_se;
3723         unsigned num_rb_pipes;
3724
3725         mutex_lock(&adev->grbm_idx_mutex);
3726         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3727                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3728                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3729                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3730                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3731                                                rb_bitmap_width_per_sh);
3732                 }
3733         }
3734         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3735
3736         adev->gfx.config.backend_enable_mask = active_rbs;
3737         adev->gfx.config.num_rbs = hweight32(active_rbs);
3738
3739         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3740                              adev->gfx.config.max_shader_engines, 16);
3741
3742         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3743
3744         if (!adev->gfx.config.backend_enable_mask ||
3745                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3746                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3747                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3748         } else {
3749                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3750                                                         adev->gfx.config.backend_enable_mask,
3751                                                         num_rb_pipes);
3752         }
3753
3754         /* cache the values for userspace */
3755         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3756                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3757                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3758                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3759                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3760                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3761                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3762                         adev->gfx.config.rb_config[i][j].raster_config =
3763                                 RREG32(mmPA_SC_RASTER_CONFIG);
3764                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3765                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3766                 }
3767         }
3768         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3769         mutex_unlock(&adev->grbm_idx_mutex);
3770 }
3771
3772 /**
3773  * gfx_v8_0_init_compute_vmid - gart enable
3774  *
3775  * @adev: amdgpu_device pointer
3776  *
3777  * Initialize compute vmid sh_mem registers
3778  *
3779  */
3780 #define DEFAULT_SH_MEM_BASES    (0x6000)
3781 #define FIRST_COMPUTE_VMID      (8)
3782 #define LAST_COMPUTE_VMID       (16)
3783 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3784 {
3785         int i;
3786         uint32_t sh_mem_config;
3787         uint32_t sh_mem_bases;
3788
3789         /*
3790          * Configure apertures:
3791          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3792          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3793          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3794          */
3795         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3796
3797         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3798                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3799                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3800                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3801                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3802                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3803
3804         mutex_lock(&adev->srbm_mutex);
3805         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3806                 vi_srbm_select(adev, 0, 0, 0, i);
3807                 /* CP and shaders */
3808                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3809                 WREG32(mmSH_MEM_APE1_BASE, 1);
3810                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3811                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3812         }
3813         vi_srbm_select(adev, 0, 0, 0, 0);
3814         mutex_unlock(&adev->srbm_mutex);
3815 }
3816
3817 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3818 {
3819         switch (adev->asic_type) {
3820         default:
3821                 adev->gfx.config.double_offchip_lds_buf = 1;
3822                 break;
3823         case CHIP_CARRIZO:
3824         case CHIP_STONEY:
3825                 adev->gfx.config.double_offchip_lds_buf = 0;
3826                 break;
3827         }
3828 }
3829
3830 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3831 {
3832         u32 tmp, sh_static_mem_cfg;
3833         int i;
3834
3835         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3836         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3837         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3838         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3839
3840         gfx_v8_0_tiling_mode_table_init(adev);
3841         gfx_v8_0_setup_rb(adev);
3842         gfx_v8_0_get_cu_info(adev);
3843         gfx_v8_0_config_init(adev);
3844
3845         /* XXX SH_MEM regs */
3846         /* where to put LDS, scratch, GPUVM in FSA64 space */
3847         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3848                                    SWIZZLE_ENABLE, 1);
3849         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3850                                    ELEMENT_SIZE, 1);
3851         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3852                                    INDEX_STRIDE, 3);
3853         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3854
3855         mutex_lock(&adev->srbm_mutex);
3856         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3857                 vi_srbm_select(adev, 0, 0, 0, i);
3858                 /* CP and shaders */
3859                 if (i == 0) {
3860                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3861                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3862                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3863                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3864                         WREG32(mmSH_MEM_CONFIG, tmp);
3865                         WREG32(mmSH_MEM_BASES, 0);
3866                 } else {
3867                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3868                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3869                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3870                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3871                         WREG32(mmSH_MEM_CONFIG, tmp);
3872                         tmp = adev->gmc.shared_aperture_start >> 48;
3873                         WREG32(mmSH_MEM_BASES, tmp);
3874                 }
3875
3876                 WREG32(mmSH_MEM_APE1_BASE, 1);
3877                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3878         }
3879         vi_srbm_select(adev, 0, 0, 0, 0);
3880         mutex_unlock(&adev->srbm_mutex);
3881
3882         gfx_v8_0_init_compute_vmid(adev);
3883
3884         mutex_lock(&adev->grbm_idx_mutex);
3885         /*
3886          * making sure that the following register writes will be broadcasted
3887          * to all the shaders
3888          */
3889         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3890
3891         WREG32(mmPA_SC_FIFO_SIZE,
3892                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3893                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3894                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3895                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3896                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3897                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3898                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3899                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3900
3901         tmp = RREG32(mmSPI_ARB_PRIORITY);
3902         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3903         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3904         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3905         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3906         WREG32(mmSPI_ARB_PRIORITY, tmp);
3907
3908         mutex_unlock(&adev->grbm_idx_mutex);
3909
3910 }
3911
3912 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3913 {
3914         u32 i, j, k;
3915         u32 mask;
3916
3917         mutex_lock(&adev->grbm_idx_mutex);
3918         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3919                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3920                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3921                         for (k = 0; k < adev->usec_timeout; k++) {
3922                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3923                                         break;
3924                                 udelay(1);
3925                         }
3926                         if (k == adev->usec_timeout) {
3927                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3928                                                       0xffffffff, 0xffffffff);
3929                                 mutex_unlock(&adev->grbm_idx_mutex);
3930                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3931                                          i, j);
3932                                 return;
3933                         }
3934                 }
3935         }
3936         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3937         mutex_unlock(&adev->grbm_idx_mutex);
3938
3939         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3940                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3941                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3942                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3943         for (k = 0; k < adev->usec_timeout; k++) {
3944                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3945                         break;
3946                 udelay(1);
3947         }
3948 }
3949
3950 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3951                                                bool enable)
3952 {
3953         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3954
3955         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3956         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3957         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3958         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3959
3960         WREG32(mmCP_INT_CNTL_RING0, tmp);
3961 }
3962
3963 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3964 {
3965         /* csib */
3966         WREG32(mmRLC_CSIB_ADDR_HI,
3967                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3968         WREG32(mmRLC_CSIB_ADDR_LO,
3969                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3970         WREG32(mmRLC_CSIB_LENGTH,
3971                         adev->gfx.rlc.clear_state_size);
3972 }
3973
3974 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3975                                 int ind_offset,
3976                                 int list_size,
3977                                 int *unique_indices,
3978                                 int *indices_count,
3979                                 int max_indices,
3980                                 int *ind_start_offsets,
3981                                 int *offset_count,
3982                                 int max_offset)
3983 {
3984         int indices;
3985         bool new_entry = true;
3986
3987         for (; ind_offset < list_size; ind_offset++) {
3988
3989                 if (new_entry) {
3990                         new_entry = false;
3991                         ind_start_offsets[*offset_count] = ind_offset;
3992                         *offset_count = *offset_count + 1;
3993                         BUG_ON(*offset_count >= max_offset);
3994                 }
3995
3996                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3997                         new_entry = true;
3998                         continue;
3999                 }
4000
4001                 ind_offset += 2;
4002
4003                 /* look for the matching indice */
4004                 for (indices = 0;
4005                         indices < *indices_count;
4006                         indices++) {
4007                         if (unique_indices[indices] ==
4008                                 register_list_format[ind_offset])
4009                                 break;
4010                 }
4011
4012                 if (indices >= *indices_count) {
4013                         unique_indices[*indices_count] =
4014                                 register_list_format[ind_offset];
4015                         indices = *indices_count;
4016                         *indices_count = *indices_count + 1;
4017                         BUG_ON(*indices_count >= max_indices);
4018                 }
4019
4020                 register_list_format[ind_offset] = indices;
4021         }
4022 }
4023
4024 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4025 {
4026         int i, temp, data;
4027         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4028         int indices_count = 0;
4029         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4030         int offset_count = 0;
4031
4032         int list_size;
4033         unsigned int *register_list_format =
4034                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4035         if (!register_list_format)
4036                 return -ENOMEM;
4037         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4038                         adev->gfx.rlc.reg_list_format_size_bytes);
4039
4040         gfx_v8_0_parse_ind_reg_list(register_list_format,
4041                                 RLC_FormatDirectRegListLength,
4042                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4043                                 unique_indices,
4044                                 &indices_count,
4045                                 ARRAY_SIZE(unique_indices),
4046                                 indirect_start_offsets,
4047                                 &offset_count,
4048                                 ARRAY_SIZE(indirect_start_offsets));
4049
4050         /* save and restore list */
4051         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4052
4053         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4054         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4055                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4056
4057         /* indirect list */
4058         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4059         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4060                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4061
4062         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4063         list_size = list_size >> 1;
4064         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4065         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4066
4067         /* starting offsets starts */
4068         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4069                 adev->gfx.rlc.starting_offsets_start);
4070         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4071                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4072                                 indirect_start_offsets[i]);
4073
4074         /* unique indices */
4075         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4076         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4077         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4078                 if (unique_indices[i] != 0) {
4079                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4080                         WREG32(data + i, unique_indices[i] >> 20);
4081                 }
4082         }
4083         kfree(register_list_format);
4084
4085         return 0;
4086 }
4087
4088 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4089 {
4090         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4091 }
4092
4093 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4094 {
4095         uint32_t data;
4096
4097         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4098
4099         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4100         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4101         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4102         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4103         WREG32(mmRLC_PG_DELAY, data);
4104
4105         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4106         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4107
4108 }
4109
4110 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4111                                                 bool enable)
4112 {
4113         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4114 }
4115
4116 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4117                                                   bool enable)
4118 {
4119         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4120 }
4121
4122 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4123 {
4124         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4125 }
4126
4127 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4128 {
4129         if ((adev->asic_type == CHIP_CARRIZO) ||
4130             (adev->asic_type == CHIP_STONEY)) {
4131                 gfx_v8_0_init_csb(adev);
4132                 gfx_v8_0_init_save_restore_list(adev);
4133                 gfx_v8_0_enable_save_restore_machine(adev);
4134                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4135                 gfx_v8_0_init_power_gating(adev);
4136                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4137         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4138                    (adev->asic_type == CHIP_POLARIS12) ||
4139                    (adev->asic_type == CHIP_VEGAM)) {
4140                 gfx_v8_0_init_csb(adev);
4141                 gfx_v8_0_init_save_restore_list(adev);
4142                 gfx_v8_0_enable_save_restore_machine(adev);
4143                 gfx_v8_0_init_power_gating(adev);
4144         }
4145
4146 }
4147
4148 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4149 {
4150         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4151
4152         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4153         gfx_v8_0_wait_for_rlc_serdes(adev);
4154 }
4155
4156 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4157 {
4158         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4159         udelay(50);
4160
4161         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4162         udelay(50);
4163 }
4164
4165 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4166 {
4167         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4168
4169         /* carrizo do enable cp interrupt after cp inited */
4170         if (!(adev->flags & AMD_IS_APU))
4171                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4172
4173         udelay(50);
4174 }
4175
4176 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4177 {
4178         gfx_v8_0_rlc_stop(adev);
4179         gfx_v8_0_rlc_reset(adev);
4180         gfx_v8_0_init_pg(adev);
4181         gfx_v8_0_rlc_start(adev);
4182
4183         return 0;
4184 }
4185
4186 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4187 {
4188         int i;
4189         u32 tmp = RREG32(mmCP_ME_CNTL);
4190
4191         if (enable) {
4192                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4193                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4194                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4195         } else {
4196                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4197                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4198                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4199                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4200                         adev->gfx.gfx_ring[i].ready = false;
4201         }
4202         WREG32(mmCP_ME_CNTL, tmp);
4203         udelay(50);
4204 }
4205
4206 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4207 {
4208         u32 count = 0;
4209         const struct cs_section_def *sect = NULL;
4210         const struct cs_extent_def *ext = NULL;
4211
4212         /* begin clear state */
4213         count += 2;
4214         /* context control state */
4215         count += 3;
4216
4217         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4218                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4219                         if (sect->id == SECT_CONTEXT)
4220                                 count += 2 + ext->reg_count;
4221                         else
4222                                 return 0;
4223                 }
4224         }
4225         /* pa_sc_raster_config/pa_sc_raster_config1 */
4226         count += 4;
4227         /* end clear state */
4228         count += 2;
4229         /* clear state */
4230         count += 2;
4231
4232         return count;
4233 }
4234
4235 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4236 {
4237         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4238         const struct cs_section_def *sect = NULL;
4239         const struct cs_extent_def *ext = NULL;
4240         int r, i;
4241
4242         /* init the CP */
4243         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4244         WREG32(mmCP_ENDIAN_SWAP, 0);
4245         WREG32(mmCP_DEVICE_ID, 1);
4246
4247         gfx_v8_0_cp_gfx_enable(adev, true);
4248
4249         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4250         if (r) {
4251                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4252                 return r;
4253         }
4254
4255         /* clear state buffer */
4256         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4257         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4258
4259         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4260         amdgpu_ring_write(ring, 0x80000000);
4261         amdgpu_ring_write(ring, 0x80000000);
4262
4263         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4264                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4265                         if (sect->id == SECT_CONTEXT) {
4266                                 amdgpu_ring_write(ring,
4267                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4268                                                ext->reg_count));
4269                                 amdgpu_ring_write(ring,
4270                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4271                                 for (i = 0; i < ext->reg_count; i++)
4272                                         amdgpu_ring_write(ring, ext->extent[i]);
4273                         }
4274                 }
4275         }
4276
4277         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4278         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4279         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4280         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4281
4282         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4283         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4284
4285         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4286         amdgpu_ring_write(ring, 0);
4287
4288         /* init the CE partitions */
4289         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4290         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4291         amdgpu_ring_write(ring, 0x8000);
4292         amdgpu_ring_write(ring, 0x8000);
4293
4294         amdgpu_ring_commit(ring);
4295
4296         return 0;
4297 }
4298 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4299 {
4300         u32 tmp;
4301         /* no gfx doorbells on iceland */
4302         if (adev->asic_type == CHIP_TOPAZ)
4303                 return;
4304
4305         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4306
4307         if (ring->use_doorbell) {
4308                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4309                                 DOORBELL_OFFSET, ring->doorbell_index);
4310                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4311                                                 DOORBELL_HIT, 0);
4312                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4313                                             DOORBELL_EN, 1);
4314         } else {
4315                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4316         }
4317
4318         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4319
4320         if (adev->flags & AMD_IS_APU)
4321                 return;
4322
4323         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4324                                         DOORBELL_RANGE_LOWER,
4325                                         AMDGPU_DOORBELL_GFX_RING0);
4326         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4327
4328         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4329                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4330 }
4331
4332 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4333 {
4334         struct amdgpu_ring *ring;
4335         u32 tmp;
4336         u32 rb_bufsz;
4337         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4338         int r;
4339
4340         /* Set the write pointer delay */
4341         WREG32(mmCP_RB_WPTR_DELAY, 0);
4342
4343         /* set the RB to use vmid 0 */
4344         WREG32(mmCP_RB_VMID, 0);
4345
4346         /* Set ring buffer size */
4347         ring = &adev->gfx.gfx_ring[0];
4348         rb_bufsz = order_base_2(ring->ring_size / 8);
4349         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4350         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4351         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4352         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4353 #ifdef __BIG_ENDIAN
4354         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4355 #endif
4356         WREG32(mmCP_RB0_CNTL, tmp);
4357
4358         /* Initialize the ring buffer's read and write pointers */
4359         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4360         ring->wptr = 0;
4361         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4362
4363         /* set the wb address wether it's enabled or not */
4364         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4365         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4366         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4367
4368         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4369         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4370         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4371         mdelay(1);
4372         WREG32(mmCP_RB0_CNTL, tmp);
4373
4374         rb_addr = ring->gpu_addr >> 8;
4375         WREG32(mmCP_RB0_BASE, rb_addr);
4376         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4377
4378         gfx_v8_0_set_cpg_door_bell(adev, ring);
4379         /* start the ring */
4380         amdgpu_ring_clear_ring(ring);
4381         gfx_v8_0_cp_gfx_start(adev);
4382         ring->ready = true;
4383         r = amdgpu_ring_test_ring(ring);
4384         if (r)
4385                 ring->ready = false;
4386
4387         return r;
4388 }
4389
4390 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4391 {
4392         int i;
4393
4394         if (enable) {
4395                 WREG32(mmCP_MEC_CNTL, 0);
4396         } else {
4397                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4398                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4399                         adev->gfx.compute_ring[i].ready = false;
4400                 adev->gfx.kiq.ring.ready = false;
4401         }
4402         udelay(50);
4403 }
4404
4405 /* KIQ functions */
4406 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4407 {
4408         uint32_t tmp;
4409         struct amdgpu_device *adev = ring->adev;
4410
4411         /* tell RLC which is KIQ queue */
4412         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4413         tmp &= 0xffffff00;
4414         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4415         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4416         tmp |= 0x80;
4417         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4418 }
4419
4420 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4421 {
4422         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4423         uint64_t queue_mask = 0;
4424         int r, i;
4425
4426         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4427                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4428                         continue;
4429
4430                 /* This situation may be hit in the future if a new HW
4431                  * generation exposes more than 64 queues. If so, the
4432                  * definition of queue_mask needs updating */
4433                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4434                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4435                         break;
4436                 }
4437
4438                 queue_mask |= (1ull << i);
4439         }
4440
4441         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4442         if (r) {
4443                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4444                 return r;
4445         }
4446         /* set resources */
4447         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4448         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4449         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4450         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4451         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4452         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4453         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4454         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4455         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4456                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4457                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4458                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4459
4460                 /* map queues */
4461                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4462                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4463                 amdgpu_ring_write(kiq_ring,
4464                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4465                 amdgpu_ring_write(kiq_ring,
4466                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4467                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4468                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4469                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4470                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4471                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4472                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4473                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4474         }
4475
4476         r = amdgpu_ring_test_ring(kiq_ring);
4477         if (r) {
4478                 DRM_ERROR("KCQ enable failed\n");
4479                 kiq_ring->ready = false;
4480         }
4481         return r;
4482 }
4483
4484 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4485 {
4486         int i, r = 0;
4487
4488         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4489                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4490                 for (i = 0; i < adev->usec_timeout; i++) {
4491                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4492                                 break;
4493                         udelay(1);
4494                 }
4495                 if (i == adev->usec_timeout)
4496                         r = -ETIMEDOUT;
4497         }
4498         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4499         WREG32(mmCP_HQD_PQ_RPTR, 0);
4500         WREG32(mmCP_HQD_PQ_WPTR, 0);
4501
4502         return r;
4503 }
4504
4505 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4506 {
4507         struct amdgpu_device *adev = ring->adev;
4508         struct vi_mqd *mqd = ring->mqd_ptr;
4509         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4510         uint32_t tmp;
4511
4512         mqd->header = 0xC0310800;
4513         mqd->compute_pipelinestat_enable = 0x00000001;
4514         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4515         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4516         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4517         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4518         mqd->compute_misc_reserved = 0x00000003;
4519         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4520                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4521         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4522                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4523         eop_base_addr = ring->eop_gpu_addr >> 8;
4524         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4525         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4526
4527         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4528         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4529         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4530                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4531
4532         mqd->cp_hqd_eop_control = tmp;
4533
4534         /* enable doorbell? */
4535         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4536                             CP_HQD_PQ_DOORBELL_CONTROL,
4537                             DOORBELL_EN,
4538                             ring->use_doorbell ? 1 : 0);
4539
4540         mqd->cp_hqd_pq_doorbell_control = tmp;
4541
4542         /* set the pointer to the MQD */
4543         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4544         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4545
4546         /* set MQD vmid to 0 */
4547         tmp = RREG32(mmCP_MQD_CONTROL);
4548         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4549         mqd->cp_mqd_control = tmp;
4550
4551         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4552         hqd_gpu_addr = ring->gpu_addr >> 8;
4553         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4554         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4555
4556         /* set up the HQD, this is similar to CP_RB0_CNTL */
4557         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4558         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4559                             (order_base_2(ring->ring_size / 4) - 1));
4560         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4561                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4562 #ifdef __BIG_ENDIAN
4563         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4564 #endif
4565         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4566         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4567         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4568         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4569         mqd->cp_hqd_pq_control = tmp;
4570
4571         /* set the wb address whether it's enabled or not */
4572         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4573         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4574         mqd->cp_hqd_pq_rptr_report_addr_hi =
4575                 upper_32_bits(wb_gpu_addr) & 0xffff;
4576
4577         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4578         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4579         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4580         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4581
4582         tmp = 0;
4583         /* enable the doorbell if requested */
4584         if (ring->use_doorbell) {
4585                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4586                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4587                                 DOORBELL_OFFSET, ring->doorbell_index);
4588
4589                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4590                                          DOORBELL_EN, 1);
4591                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4592                                          DOORBELL_SOURCE, 0);
4593                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4594                                          DOORBELL_HIT, 0);
4595         }
4596
4597         mqd->cp_hqd_pq_doorbell_control = tmp;
4598
4599         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4600         ring->wptr = 0;
4601         mqd->cp_hqd_pq_wptr = ring->wptr;
4602         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4603
4604         /* set the vmid for the queue */
4605         mqd->cp_hqd_vmid = 0;
4606
4607         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4608         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4609         mqd->cp_hqd_persistent_state = tmp;
4610
4611         /* set MTYPE */
4612         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4613         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4614         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4615         mqd->cp_hqd_ib_control = tmp;
4616
4617         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4618         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4619         mqd->cp_hqd_iq_timer = tmp;
4620
4621         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4622         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4623         mqd->cp_hqd_ctx_save_control = tmp;
4624
4625         /* defaults */
4626         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4627         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4628         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4629         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4630         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4631         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4632         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4633         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4634         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4635         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4636         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4637         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4638         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4639         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4640         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4641
4642         /* activate the queue */
4643         mqd->cp_hqd_active = 1;
4644
4645         return 0;
4646 }
4647
4648 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4649                         struct vi_mqd *mqd)
4650 {
4651         uint32_t mqd_reg;
4652         uint32_t *mqd_data;
4653
4654         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4655         mqd_data = &mqd->cp_mqd_base_addr_lo;
4656
4657         /* disable wptr polling */
4658         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4659
4660         /* program all HQD registers */
4661         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4662                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4663
4664         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4665          * This is safe since EOP RPTR==WPTR for any inactive HQD
4666          * on ASICs that do not support context-save.
4667          * EOP writes/reads can start anywhere in the ring.
4668          */
4669         if (adev->asic_type != CHIP_TONGA) {
4670                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4671                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4672                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4673         }
4674
4675         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4676                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4677
4678         /* activate the HQD */
4679         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4680                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4681
4682         return 0;
4683 }
4684
4685 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4686 {
4687         struct amdgpu_device *adev = ring->adev;
4688         struct vi_mqd *mqd = ring->mqd_ptr;
4689         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4690
4691         gfx_v8_0_kiq_setting(ring);
4692
4693         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4694                 /* reset MQD to a clean status */
4695                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4696                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4697
4698                 /* reset ring buffer */
4699                 ring->wptr = 0;
4700                 amdgpu_ring_clear_ring(ring);
4701                 mutex_lock(&adev->srbm_mutex);
4702                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4703                 gfx_v8_0_mqd_commit(adev, mqd);
4704                 vi_srbm_select(adev, 0, 0, 0, 0);
4705                 mutex_unlock(&adev->srbm_mutex);
4706         } else {
4707                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4708                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4709                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4710                 mutex_lock(&adev->srbm_mutex);
4711                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4712                 gfx_v8_0_mqd_init(ring);
4713                 gfx_v8_0_mqd_commit(adev, mqd);
4714                 vi_srbm_select(adev, 0, 0, 0, 0);
4715                 mutex_unlock(&adev->srbm_mutex);
4716
4717                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4718                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4719         }
4720
4721         return 0;
4722 }
4723
4724 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4725 {
4726         struct amdgpu_device *adev = ring->adev;
4727         struct vi_mqd *mqd = ring->mqd_ptr;
4728         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4729
4730         if (!adev->in_gpu_reset && !adev->in_suspend) {
4731                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4732                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4733                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4734                 mutex_lock(&adev->srbm_mutex);
4735                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4736                 gfx_v8_0_mqd_init(ring);
4737                 vi_srbm_select(adev, 0, 0, 0, 0);
4738                 mutex_unlock(&adev->srbm_mutex);
4739
4740                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4741                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4742         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4743                 /* reset MQD to a clean status */
4744                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4745                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4746                 /* reset ring buffer */
4747                 ring->wptr = 0;
4748                 amdgpu_ring_clear_ring(ring);
4749         } else {
4750                 amdgpu_ring_clear_ring(ring);
4751         }
4752         return 0;
4753 }
4754
4755 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4756 {
4757         if (adev->asic_type > CHIP_TONGA) {
4758                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4759                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4760         }
4761         /* enable doorbells */
4762         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4763 }
4764
4765 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4766 {
4767         struct amdgpu_ring *ring;
4768         int r;
4769
4770         ring = &adev->gfx.kiq.ring;
4771
4772         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4773         if (unlikely(r != 0))
4774                 return r;
4775
4776         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4777         if (unlikely(r != 0))
4778                 return r;
4779
4780         gfx_v8_0_kiq_init_queue(ring);
4781         amdgpu_bo_kunmap(ring->mqd_obj);
4782         ring->mqd_ptr = NULL;
4783         amdgpu_bo_unreserve(ring->mqd_obj);
4784         ring->ready = true;
4785         return 0;
4786 }
4787
4788 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4789 {
4790         struct amdgpu_ring *ring = NULL;
4791         int r = 0, i;
4792
4793         gfx_v8_0_cp_compute_enable(adev, true);
4794
4795         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4796                 ring = &adev->gfx.compute_ring[i];
4797
4798                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4799                 if (unlikely(r != 0))
4800                         goto done;
4801                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4802                 if (!r) {
4803                         r = gfx_v8_0_kcq_init_queue(ring);
4804                         amdgpu_bo_kunmap(ring->mqd_obj);
4805                         ring->mqd_ptr = NULL;
4806                 }
4807                 amdgpu_bo_unreserve(ring->mqd_obj);
4808                 if (r)
4809                         goto done;
4810         }
4811
4812         gfx_v8_0_set_mec_doorbell_range(adev);
4813
4814         r = gfx_v8_0_kiq_kcq_enable(adev);
4815         if (r)
4816                 goto done;
4817
4818         /* Test KCQs - reversing the order of rings seems to fix ring test failure
4819          * after GPU reset
4820          */
4821         for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) {
4822                 ring = &adev->gfx.compute_ring[i];
4823                 ring->ready = true;
4824                 r = amdgpu_ring_test_ring(ring);
4825                 if (r)
4826                         ring->ready = false;
4827         }
4828
4829 done:
4830         return r;
4831 }
4832
4833 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4834 {
4835         int r;
4836
4837         if (!(adev->flags & AMD_IS_APU))
4838                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4839
4840         r = gfx_v8_0_kiq_resume(adev);
4841         if (r)
4842                 return r;
4843
4844         r = gfx_v8_0_cp_gfx_resume(adev);
4845         if (r)
4846                 return r;
4847
4848         r = gfx_v8_0_kcq_resume(adev);
4849         if (r)
4850                 return r;
4851         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4852
4853         return 0;
4854 }
4855
4856 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4857 {
4858         gfx_v8_0_cp_gfx_enable(adev, enable);
4859         gfx_v8_0_cp_compute_enable(adev, enable);
4860 }
4861
4862 static int gfx_v8_0_hw_init(void *handle)
4863 {
4864         int r;
4865         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4866
4867         gfx_v8_0_init_golden_registers(adev);
4868         gfx_v8_0_constants_init(adev);
4869
4870         r = gfx_v8_0_rlc_resume(adev);
4871         if (r)
4872                 return r;
4873
4874         r = gfx_v8_0_cp_resume(adev);
4875
4876         return r;
4877 }
4878
4879 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4880 {
4881         int r, i;
4882         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4883
4884         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4885         if (r)
4886                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4887
4888         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4889                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4890
4891                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4892                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4893                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4894                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4895                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4896                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4897                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4898                 amdgpu_ring_write(kiq_ring, 0);
4899                 amdgpu_ring_write(kiq_ring, 0);
4900                 amdgpu_ring_write(kiq_ring, 0);
4901         }
4902         r = amdgpu_ring_test_ring(kiq_ring);
4903         if (r)
4904                 DRM_ERROR("KCQ disable failed\n");
4905
4906         return r;
4907 }
4908
4909 static bool gfx_v8_0_is_idle(void *handle)
4910 {
4911         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4912
4913         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4914                 || RREG32(mmGRBM_STATUS2) != 0x8)
4915                 return false;
4916         else
4917                 return true;
4918 }
4919
4920 static bool gfx_v8_0_rlc_is_idle(void *handle)
4921 {
4922         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4923
4924         if (RREG32(mmGRBM_STATUS2) != 0x8)
4925                 return false;
4926         else
4927                 return true;
4928 }
4929
4930 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4931 {
4932         unsigned int i;
4933         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4934
4935         for (i = 0; i < adev->usec_timeout; i++) {
4936                 if (gfx_v8_0_rlc_is_idle(handle))
4937                         return 0;
4938
4939                 udelay(1);
4940         }
4941         return -ETIMEDOUT;
4942 }
4943
4944 static int gfx_v8_0_wait_for_idle(void *handle)
4945 {
4946         unsigned int i;
4947         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4948
4949         for (i = 0; i < adev->usec_timeout; i++) {
4950                 if (gfx_v8_0_is_idle(handle))
4951                         return 0;
4952
4953                 udelay(1);
4954         }
4955         return -ETIMEDOUT;
4956 }
4957
4958 static int gfx_v8_0_hw_fini(void *handle)
4959 {
4960         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4961
4962         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4963         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4964
4965         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4966
4967         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4968
4969         /* disable KCQ to avoid CPC touch memory not valid anymore */
4970         gfx_v8_0_kcq_disable(adev);
4971
4972         if (amdgpu_sriov_vf(adev)) {
4973                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4974                 return 0;
4975         }
4976         adev->gfx.rlc.funcs->enter_safe_mode(adev);
4977         if (!gfx_v8_0_wait_for_idle(adev))
4978                 gfx_v8_0_cp_enable(adev, false);
4979         else
4980                 pr_err("cp is busy, skip halt cp\n");
4981         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4982                 gfx_v8_0_rlc_stop(adev);
4983         else
4984                 pr_err("rlc is busy, skip halt rlc\n");
4985         adev->gfx.rlc.funcs->exit_safe_mode(adev);
4986         return 0;
4987 }
4988
4989 static int gfx_v8_0_suspend(void *handle)
4990 {
4991         return gfx_v8_0_hw_fini(handle);
4992 }
4993
4994 static int gfx_v8_0_resume(void *handle)
4995 {
4996         return gfx_v8_0_hw_init(handle);
4997 }
4998
4999 static bool gfx_v8_0_check_soft_reset(void *handle)
5000 {
5001         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5002         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5003         u32 tmp;
5004
5005         /* GRBM_STATUS */
5006         tmp = RREG32(mmGRBM_STATUS);
5007         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5008                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5009                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5010                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5011                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5012                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5013                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5014                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5015                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5016                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5017                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5018                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5019                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5020         }
5021
5022         /* GRBM_STATUS2 */
5023         tmp = RREG32(mmGRBM_STATUS2);
5024         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5025                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5026                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5027
5028         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5029             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5030             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5031                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5032                                                 SOFT_RESET_CPF, 1);
5033                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5034                                                 SOFT_RESET_CPC, 1);
5035                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5036                                                 SOFT_RESET_CPG, 1);
5037                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5038                                                 SOFT_RESET_GRBM, 1);
5039         }
5040
5041         /* SRBM_STATUS */
5042         tmp = RREG32(mmSRBM_STATUS);
5043         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5044                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5045                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5046         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5047                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5048                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5049
5050         if (grbm_soft_reset || srbm_soft_reset) {
5051                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5052                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5053                 return true;
5054         } else {
5055                 adev->gfx.grbm_soft_reset = 0;
5056                 adev->gfx.srbm_soft_reset = 0;
5057                 return false;
5058         }
5059 }
5060
5061 static int gfx_v8_0_pre_soft_reset(void *handle)
5062 {
5063         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5064         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5065
5066         if ((!adev->gfx.grbm_soft_reset) &&
5067             (!adev->gfx.srbm_soft_reset))
5068                 return 0;
5069
5070         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5071         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5072
5073         /* stop the rlc */
5074         gfx_v8_0_rlc_stop(adev);
5075
5076         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5077             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5078                 /* Disable GFX parsing/prefetching */
5079                 gfx_v8_0_cp_gfx_enable(adev, false);
5080
5081         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5082             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5083             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5084             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5085                 int i;
5086
5087                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5088                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5089
5090                         mutex_lock(&adev->srbm_mutex);
5091                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5092                         gfx_v8_0_deactivate_hqd(adev, 2);
5093                         vi_srbm_select(adev, 0, 0, 0, 0);
5094                         mutex_unlock(&adev->srbm_mutex);
5095                 }
5096                 /* Disable MEC parsing/prefetching */
5097                 gfx_v8_0_cp_compute_enable(adev, false);
5098         }
5099
5100        return 0;
5101 }
5102
5103 static int gfx_v8_0_soft_reset(void *handle)
5104 {
5105         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5106         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5107         u32 tmp;
5108
5109         if ((!adev->gfx.grbm_soft_reset) &&
5110             (!adev->gfx.srbm_soft_reset))
5111                 return 0;
5112
5113         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5114         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5115
5116         if (grbm_soft_reset || srbm_soft_reset) {
5117                 tmp = RREG32(mmGMCON_DEBUG);
5118                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5119                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5120                 WREG32(mmGMCON_DEBUG, tmp);
5121                 udelay(50);
5122         }
5123
5124         if (grbm_soft_reset) {
5125                 tmp = RREG32(mmGRBM_SOFT_RESET);
5126                 tmp |= grbm_soft_reset;
5127                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5128                 WREG32(mmGRBM_SOFT_RESET, tmp);
5129                 tmp = RREG32(mmGRBM_SOFT_RESET);
5130
5131                 udelay(50);
5132
5133                 tmp &= ~grbm_soft_reset;
5134                 WREG32(mmGRBM_SOFT_RESET, tmp);
5135                 tmp = RREG32(mmGRBM_SOFT_RESET);
5136         }
5137
5138         if (srbm_soft_reset) {
5139                 tmp = RREG32(mmSRBM_SOFT_RESET);
5140                 tmp |= srbm_soft_reset;
5141                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5142                 WREG32(mmSRBM_SOFT_RESET, tmp);
5143                 tmp = RREG32(mmSRBM_SOFT_RESET);
5144
5145                 udelay(50);
5146
5147                 tmp &= ~srbm_soft_reset;
5148                 WREG32(mmSRBM_SOFT_RESET, tmp);
5149                 tmp = RREG32(mmSRBM_SOFT_RESET);
5150         }
5151
5152         if (grbm_soft_reset || srbm_soft_reset) {
5153                 tmp = RREG32(mmGMCON_DEBUG);
5154                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5155                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5156                 WREG32(mmGMCON_DEBUG, tmp);
5157         }
5158
5159         /* Wait a little for things to settle down */
5160         udelay(50);
5161
5162         return 0;
5163 }
5164
5165 static int gfx_v8_0_post_soft_reset(void *handle)
5166 {
5167         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5168         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5169
5170         if ((!adev->gfx.grbm_soft_reset) &&
5171             (!adev->gfx.srbm_soft_reset))
5172                 return 0;
5173
5174         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5175         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5176
5177         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5178             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5179             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5180             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5181                 int i;
5182
5183                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5184                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5185
5186                         mutex_lock(&adev->srbm_mutex);
5187                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5188                         gfx_v8_0_deactivate_hqd(adev, 2);
5189                         vi_srbm_select(adev, 0, 0, 0, 0);
5190                         mutex_unlock(&adev->srbm_mutex);
5191                 }
5192                 gfx_v8_0_kiq_resume(adev);
5193                 gfx_v8_0_kcq_resume(adev);
5194         }
5195
5196         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5197             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5198                 gfx_v8_0_cp_gfx_resume(adev);
5199
5200         gfx_v8_0_rlc_start(adev);
5201
5202         return 0;
5203 }
5204
5205 /**
5206  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5207  *
5208  * @adev: amdgpu_device pointer
5209  *
5210  * Fetches a GPU clock counter snapshot.
5211  * Returns the 64 bit clock counter snapshot.
5212  */
5213 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5214 {
5215         uint64_t clock;
5216
5217         mutex_lock(&adev->gfx.gpu_clock_mutex);
5218         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5219         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5220                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5221         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5222         return clock;
5223 }
5224
5225 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5226                                           uint32_t vmid,
5227                                           uint32_t gds_base, uint32_t gds_size,
5228                                           uint32_t gws_base, uint32_t gws_size,
5229                                           uint32_t oa_base, uint32_t oa_size)
5230 {
5231         /* GDS Base */
5232         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5233         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5234                                 WRITE_DATA_DST_SEL(0)));
5235         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5236         amdgpu_ring_write(ring, 0);
5237         amdgpu_ring_write(ring, gds_base);
5238
5239         /* GDS Size */
5240         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5241         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5242                                 WRITE_DATA_DST_SEL(0)));
5243         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5244         amdgpu_ring_write(ring, 0);
5245         amdgpu_ring_write(ring, gds_size);
5246
5247         /* GWS */
5248         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5249         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5250                                 WRITE_DATA_DST_SEL(0)));
5251         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5252         amdgpu_ring_write(ring, 0);
5253         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5254
5255         /* OA */
5256         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5257         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5258                                 WRITE_DATA_DST_SEL(0)));
5259         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5260         amdgpu_ring_write(ring, 0);
5261         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5262 }
5263
5264 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5265 {
5266         WREG32(mmSQ_IND_INDEX,
5267                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5268                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5269                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5270                 (SQ_IND_INDEX__FORCE_READ_MASK));
5271         return RREG32(mmSQ_IND_DATA);
5272 }
5273
5274 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5275                            uint32_t wave, uint32_t thread,
5276                            uint32_t regno, uint32_t num, uint32_t *out)
5277 {
5278         WREG32(mmSQ_IND_INDEX,
5279                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5280                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5281                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5282                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5283                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5284                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5285         while (num--)
5286                 *(out++) = RREG32(mmSQ_IND_DATA);
5287 }
5288
5289 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5290 {
5291         /* type 0 wave data */
5292         dst[(*no_fields)++] = 0;
5293         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5294         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5295         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5296         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5297         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5298         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5299         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5300         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5301         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5302         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5303         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5304         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5305         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5306         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5307         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5308         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5309         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5310         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5311 }
5312
5313 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5314                                      uint32_t wave, uint32_t start,
5315                                      uint32_t size, uint32_t *dst)
5316 {
5317         wave_read_regs(
5318                 adev, simd, wave, 0,
5319                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5320 }
5321
5322
5323 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5324         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5325         .select_se_sh = &gfx_v8_0_select_se_sh,
5326         .read_wave_data = &gfx_v8_0_read_wave_data,
5327         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5328         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5329 };
5330
5331 static int gfx_v8_0_early_init(void *handle)
5332 {
5333         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5334
5335         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5336         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5337         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5338         gfx_v8_0_set_ring_funcs(adev);
5339         gfx_v8_0_set_irq_funcs(adev);
5340         gfx_v8_0_set_gds_init(adev);
5341         gfx_v8_0_set_rlc_funcs(adev);
5342
5343         return 0;
5344 }
5345
5346 static int gfx_v8_0_late_init(void *handle)
5347 {
5348         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5349         int r;
5350
5351         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5352         if (r)
5353                 return r;
5354
5355         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5356         if (r)
5357                 return r;
5358
5359         /* requires IBs so do in late init after IB pool is initialized */
5360         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5361         if (r)
5362                 return r;
5363
5364         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5365         if (r) {
5366                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5367                 return r;
5368         }
5369
5370         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5371         if (r) {
5372                 DRM_ERROR(
5373                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5374                         r);
5375                 return r;
5376         }
5377
5378         return 0;
5379 }
5380
5381 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5382                                                        bool enable)
5383 {
5384         if (((adev->asic_type == CHIP_POLARIS11) ||
5385             (adev->asic_type == CHIP_POLARIS12) ||
5386             (adev->asic_type == CHIP_VEGAM)) &&
5387             adev->powerplay.pp_funcs->set_powergating_by_smu)
5388                 /* Send msg to SMU via Powerplay */
5389                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5390
5391         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5392 }
5393
5394 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5395                                                         bool enable)
5396 {
5397         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5398 }
5399
5400 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5401                 bool enable)
5402 {
5403         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5404 }
5405
5406 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5407                                           bool enable)
5408 {
5409         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5410 }
5411
5412 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5413                                                 bool enable)
5414 {
5415         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5416
5417         /* Read any GFX register to wake up GFX. */
5418         if (!enable)
5419                 RREG32(mmDB_RENDER_CONTROL);
5420 }
5421
5422 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5423                                           bool enable)
5424 {
5425         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5426                 cz_enable_gfx_cg_power_gating(adev, true);
5427                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5428                         cz_enable_gfx_pipeline_power_gating(adev, true);
5429         } else {
5430                 cz_enable_gfx_cg_power_gating(adev, false);
5431                 cz_enable_gfx_pipeline_power_gating(adev, false);
5432         }
5433 }
5434
5435 static int gfx_v8_0_set_powergating_state(void *handle,
5436                                           enum amd_powergating_state state)
5437 {
5438         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5439         bool enable = (state == AMD_PG_STATE_GATE);
5440
5441         if (amdgpu_sriov_vf(adev))
5442                 return 0;
5443
5444         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5445                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5446                                 AMD_PG_SUPPORT_CP |
5447                                 AMD_PG_SUPPORT_GFX_DMG))
5448                 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5449         switch (adev->asic_type) {
5450         case CHIP_CARRIZO:
5451         case CHIP_STONEY:
5452
5453                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5454                         cz_enable_sck_slow_down_on_power_up(adev, true);
5455                         cz_enable_sck_slow_down_on_power_down(adev, true);
5456                 } else {
5457                         cz_enable_sck_slow_down_on_power_up(adev, false);
5458                         cz_enable_sck_slow_down_on_power_down(adev, false);
5459                 }
5460                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5461                         cz_enable_cp_power_gating(adev, true);
5462                 else
5463                         cz_enable_cp_power_gating(adev, false);
5464
5465                 cz_update_gfx_cg_power_gating(adev, enable);
5466
5467                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5468                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5469                 else
5470                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5471
5472                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5473                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5474                 else
5475                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5476                 break;
5477         case CHIP_POLARIS11:
5478         case CHIP_POLARIS12:
5479         case CHIP_VEGAM:
5480                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5481                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5482                 else
5483                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5484
5485                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5486                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5487                 else
5488                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5489
5490                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5491                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5492                 else
5493                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5494                 break;
5495         default:
5496                 break;
5497         }
5498         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5499                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5500                                 AMD_PG_SUPPORT_CP |
5501                                 AMD_PG_SUPPORT_GFX_DMG))
5502                 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5503         return 0;
5504 }
5505
5506 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5507 {
5508         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5509         int data;
5510
5511         if (amdgpu_sriov_vf(adev))
5512                 *flags = 0;
5513
5514         /* AMD_CG_SUPPORT_GFX_MGCG */
5515         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5516         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5517                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5518
5519         /* AMD_CG_SUPPORT_GFX_CGLG */
5520         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5521         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5522                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5523
5524         /* AMD_CG_SUPPORT_GFX_CGLS */
5525         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5526                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5527
5528         /* AMD_CG_SUPPORT_GFX_CGTS */
5529         data = RREG32(mmCGTS_SM_CTRL_REG);
5530         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5531                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5532
5533         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5534         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5535                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5536
5537         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5538         data = RREG32(mmRLC_MEM_SLP_CNTL);
5539         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5540                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5541
5542         /* AMD_CG_SUPPORT_GFX_CP_LS */
5543         data = RREG32(mmCP_MEM_SLP_CNTL);
5544         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5545                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5546 }
5547
5548 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5549                                      uint32_t reg_addr, uint32_t cmd)
5550 {
5551         uint32_t data;
5552
5553         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5554
5555         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5556         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5557
5558         data = RREG32(mmRLC_SERDES_WR_CTRL);
5559         if (adev->asic_type == CHIP_STONEY)
5560                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5561                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5562                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5563                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5564                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5565                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5566                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5567                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5568                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5569         else
5570                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5571                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5572                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5573                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5574                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5575                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5576                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5577                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5578                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5579                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5580                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5581         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5582                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5583                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5584                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5585
5586         WREG32(mmRLC_SERDES_WR_CTRL, data);
5587 }
5588
5589 #define MSG_ENTER_RLC_SAFE_MODE     1
5590 #define MSG_EXIT_RLC_SAFE_MODE      0
5591 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5592 #define RLC_GPR_REG2__REQ__SHIFT 0
5593 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5594 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5595
5596 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5597 {
5598         u32 data;
5599         unsigned i;
5600
5601         data = RREG32(mmRLC_CNTL);
5602         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5603                 return;
5604
5605         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5606                 data |= RLC_SAFE_MODE__CMD_MASK;
5607                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5608                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5609                 WREG32(mmRLC_SAFE_MODE, data);
5610
5611                 for (i = 0; i < adev->usec_timeout; i++) {
5612                         if ((RREG32(mmRLC_GPM_STAT) &
5613                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5614                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5615                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5616                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5617                                 break;
5618                         udelay(1);
5619                 }
5620
5621                 for (i = 0; i < adev->usec_timeout; i++) {
5622                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5623                                 break;
5624                         udelay(1);
5625                 }
5626                 adev->gfx.rlc.in_safe_mode = true;
5627         }
5628 }
5629
5630 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5631 {
5632         u32 data = 0;
5633         unsigned i;
5634
5635         data = RREG32(mmRLC_CNTL);
5636         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5637                 return;
5638
5639         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5640                 if (adev->gfx.rlc.in_safe_mode) {
5641                         data |= RLC_SAFE_MODE__CMD_MASK;
5642                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5643                         WREG32(mmRLC_SAFE_MODE, data);
5644                         adev->gfx.rlc.in_safe_mode = false;
5645                 }
5646         }
5647
5648         for (i = 0; i < adev->usec_timeout; i++) {
5649                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5650                         break;
5651                 udelay(1);
5652         }
5653 }
5654
5655 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5656         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5657         .exit_safe_mode = iceland_exit_rlc_safe_mode
5658 };
5659
5660 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5661                                                       bool enable)
5662 {
5663         uint32_t temp, data;
5664
5665         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5666
5667         /* It is disabled by HW by default */
5668         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5669                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5670                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5671                                 /* 1 - RLC memory Light sleep */
5672                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5673
5674                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5675                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5676                 }
5677
5678                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5679                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5680                 if (adev->flags & AMD_IS_APU)
5681                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5682                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5683                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5684                 else
5685                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5686                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5687                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5688                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5689
5690                 if (temp != data)
5691                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5692
5693                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5694                 gfx_v8_0_wait_for_rlc_serdes(adev);
5695
5696                 /* 5 - clear mgcg override */
5697                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5698
5699                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5700                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5701                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5702                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5703                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5704                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5705                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5706                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5707                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5708                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5709                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5710                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5711                         if (temp != data)
5712                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5713                 }
5714                 udelay(50);
5715
5716                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5717                 gfx_v8_0_wait_for_rlc_serdes(adev);
5718         } else {
5719                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5720                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5721                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5722                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5723                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5724                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5725                 if (temp != data)
5726                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5727
5728                 /* 2 - disable MGLS in RLC */
5729                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5730                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5731                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5732                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5733                 }
5734
5735                 /* 3 - disable MGLS in CP */
5736                 data = RREG32(mmCP_MEM_SLP_CNTL);
5737                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5738                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5739                         WREG32(mmCP_MEM_SLP_CNTL, data);
5740                 }
5741
5742                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5743                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5744                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5745                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5746                 if (temp != data)
5747                         WREG32(mmCGTS_SM_CTRL_REG, data);
5748
5749                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5750                 gfx_v8_0_wait_for_rlc_serdes(adev);
5751
5752                 /* 6 - set mgcg override */
5753                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5754
5755                 udelay(50);
5756
5757                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5758                 gfx_v8_0_wait_for_rlc_serdes(adev);
5759         }
5760
5761         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5762 }
5763
5764 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5765                                                       bool enable)
5766 {
5767         uint32_t temp, temp1, data, data1;
5768
5769         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5770
5771         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5772
5773         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5774                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5775                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5776                 if (temp1 != data1)
5777                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5778
5779                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5780                 gfx_v8_0_wait_for_rlc_serdes(adev);
5781
5782                 /* 2 - clear cgcg override */
5783                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5784
5785                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5786                 gfx_v8_0_wait_for_rlc_serdes(adev);
5787
5788                 /* 3 - write cmd to set CGLS */
5789                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5790
5791                 /* 4 - enable cgcg */
5792                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5793
5794                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5795                         /* enable cgls*/
5796                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5797
5798                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5799                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5800
5801                         if (temp1 != data1)
5802                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5803                 } else {
5804                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5805                 }
5806
5807                 if (temp != data)
5808                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5809
5810                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5811                  * Cmp_busy/GFX_Idle interrupts
5812                  */
5813                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5814         } else {
5815                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5816                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5817
5818                 /* TEST CGCG */
5819                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5820                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5821                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5822                 if (temp1 != data1)
5823                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5824
5825                 /* read gfx register to wake up cgcg */
5826                 RREG32(mmCB_CGTT_SCLK_CTRL);
5827                 RREG32(mmCB_CGTT_SCLK_CTRL);
5828                 RREG32(mmCB_CGTT_SCLK_CTRL);
5829                 RREG32(mmCB_CGTT_SCLK_CTRL);
5830
5831                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5832                 gfx_v8_0_wait_for_rlc_serdes(adev);
5833
5834                 /* write cmd to Set CGCG Overrride */
5835                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5836
5837                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5838                 gfx_v8_0_wait_for_rlc_serdes(adev);
5839
5840                 /* write cmd to Clear CGLS */
5841                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5842
5843                 /* disable cgcg, cgls should be disabled too. */
5844                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5845                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5846                 if (temp != data)
5847                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5848                 /* enable interrupts again for PG */
5849                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5850         }
5851
5852         gfx_v8_0_wait_for_rlc_serdes(adev);
5853
5854         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5855 }
5856 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5857                                             bool enable)
5858 {
5859         if (enable) {
5860                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5861                  * ===  MGCG + MGLS + TS(CG/LS) ===
5862                  */
5863                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5864                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5865         } else {
5866                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5867                  * ===  CGCG + CGLS ===
5868                  */
5869                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5870                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5871         }
5872         return 0;
5873 }
5874
5875 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5876                                           enum amd_clockgating_state state)
5877 {
5878         uint32_t msg_id, pp_state = 0;
5879         uint32_t pp_support_state = 0;
5880
5881         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5882                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5883                         pp_support_state = PP_STATE_SUPPORT_LS;
5884                         pp_state = PP_STATE_LS;
5885                 }
5886                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5887                         pp_support_state |= PP_STATE_SUPPORT_CG;
5888                         pp_state |= PP_STATE_CG;
5889                 }
5890                 if (state == AMD_CG_STATE_UNGATE)
5891                         pp_state = 0;
5892
5893                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5894                                 PP_BLOCK_GFX_CG,
5895                                 pp_support_state,
5896                                 pp_state);
5897                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5898                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5899         }
5900
5901         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5902                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5903                         pp_support_state = PP_STATE_SUPPORT_LS;
5904                         pp_state = PP_STATE_LS;
5905                 }
5906
5907                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5908                         pp_support_state |= PP_STATE_SUPPORT_CG;
5909                         pp_state |= PP_STATE_CG;
5910                 }
5911
5912                 if (state == AMD_CG_STATE_UNGATE)
5913                         pp_state = 0;
5914
5915                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5916                                 PP_BLOCK_GFX_MG,
5917                                 pp_support_state,
5918                                 pp_state);
5919                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5920                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5921         }
5922
5923         return 0;
5924 }
5925
5926 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5927                                           enum amd_clockgating_state state)
5928 {
5929
5930         uint32_t msg_id, pp_state = 0;
5931         uint32_t pp_support_state = 0;
5932
5933         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5934                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5935                         pp_support_state = PP_STATE_SUPPORT_LS;
5936                         pp_state = PP_STATE_LS;
5937                 }
5938                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5939                         pp_support_state |= PP_STATE_SUPPORT_CG;
5940                         pp_state |= PP_STATE_CG;
5941                 }
5942                 if (state == AMD_CG_STATE_UNGATE)
5943                         pp_state = 0;
5944
5945                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5946                                 PP_BLOCK_GFX_CG,
5947                                 pp_support_state,
5948                                 pp_state);
5949                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5950                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5951         }
5952
5953         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5954                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5955                         pp_support_state = PP_STATE_SUPPORT_LS;
5956                         pp_state = PP_STATE_LS;
5957                 }
5958                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5959                         pp_support_state |= PP_STATE_SUPPORT_CG;
5960                         pp_state |= PP_STATE_CG;
5961                 }
5962                 if (state == AMD_CG_STATE_UNGATE)
5963                         pp_state = 0;
5964
5965                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5966                                 PP_BLOCK_GFX_3D,
5967                                 pp_support_state,
5968                                 pp_state);
5969                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5970                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5971         }
5972
5973         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5974                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5975                         pp_support_state = PP_STATE_SUPPORT_LS;
5976                         pp_state = PP_STATE_LS;
5977                 }
5978
5979                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5980                         pp_support_state |= PP_STATE_SUPPORT_CG;
5981                         pp_state |= PP_STATE_CG;
5982                 }
5983
5984                 if (state == AMD_CG_STATE_UNGATE)
5985                         pp_state = 0;
5986
5987                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5988                                 PP_BLOCK_GFX_MG,
5989                                 pp_support_state,
5990                                 pp_state);
5991                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5992                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5993         }
5994
5995         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5996                 pp_support_state = PP_STATE_SUPPORT_LS;
5997
5998                 if (state == AMD_CG_STATE_UNGATE)
5999                         pp_state = 0;
6000                 else
6001                         pp_state = PP_STATE_LS;
6002
6003                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6004                                 PP_BLOCK_GFX_RLC,
6005                                 pp_support_state,
6006                                 pp_state);
6007                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6008                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6009         }
6010
6011         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6012                 pp_support_state = PP_STATE_SUPPORT_LS;
6013
6014                 if (state == AMD_CG_STATE_UNGATE)
6015                         pp_state = 0;
6016                 else
6017                         pp_state = PP_STATE_LS;
6018                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6019                         PP_BLOCK_GFX_CP,
6020                         pp_support_state,
6021                         pp_state);
6022                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6023                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6024         }
6025
6026         return 0;
6027 }
6028
6029 static int gfx_v8_0_set_clockgating_state(void *handle,
6030                                           enum amd_clockgating_state state)
6031 {
6032         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6033
6034         if (amdgpu_sriov_vf(adev))
6035                 return 0;
6036
6037         switch (adev->asic_type) {
6038         case CHIP_FIJI:
6039         case CHIP_CARRIZO:
6040         case CHIP_STONEY:
6041                 gfx_v8_0_update_gfx_clock_gating(adev,
6042                                                  state == AMD_CG_STATE_GATE);
6043                 break;
6044         case CHIP_TONGA:
6045                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6046                 break;
6047         case CHIP_POLARIS10:
6048         case CHIP_POLARIS11:
6049         case CHIP_POLARIS12:
6050         case CHIP_VEGAM:
6051                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6052                 break;
6053         default:
6054                 break;
6055         }
6056         return 0;
6057 }
6058
6059 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6060 {
6061         return ring->adev->wb.wb[ring->rptr_offs];
6062 }
6063
6064 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6065 {
6066         struct amdgpu_device *adev = ring->adev;
6067
6068         if (ring->use_doorbell)
6069                 /* XXX check if swapping is necessary on BE */
6070                 return ring->adev->wb.wb[ring->wptr_offs];
6071         else
6072                 return RREG32(mmCP_RB0_WPTR);
6073 }
6074
6075 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6076 {
6077         struct amdgpu_device *adev = ring->adev;
6078
6079         if (ring->use_doorbell) {
6080                 /* XXX check if swapping is necessary on BE */
6081                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6082                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6083         } else {
6084                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6085                 (void)RREG32(mmCP_RB0_WPTR);
6086         }
6087 }
6088
6089 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6090 {
6091         u32 ref_and_mask, reg_mem_engine;
6092
6093         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6094             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6095                 switch (ring->me) {
6096                 case 1:
6097                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6098                         break;
6099                 case 2:
6100                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6101                         break;
6102                 default:
6103                         return;
6104                 }
6105                 reg_mem_engine = 0;
6106         } else {
6107                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6108                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6109         }
6110
6111         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6112         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6113                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6114                                  reg_mem_engine));
6115         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6116         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6117         amdgpu_ring_write(ring, ref_and_mask);
6118         amdgpu_ring_write(ring, ref_and_mask);
6119         amdgpu_ring_write(ring, 0x20); /* poll interval */
6120 }
6121
6122 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6123 {
6124         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6125         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6126                 EVENT_INDEX(4));
6127
6128         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6129         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6130                 EVENT_INDEX(0));
6131 }
6132
6133 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6134                                       struct amdgpu_ib *ib,
6135                                       unsigned vmid, bool ctx_switch)
6136 {
6137         u32 header, control = 0;
6138
6139         if (ib->flags & AMDGPU_IB_FLAG_CE)
6140                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6141         else
6142                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6143
6144         control |= ib->length_dw | (vmid << 24);
6145
6146         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6147                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6148
6149                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6150                         gfx_v8_0_ring_emit_de_meta(ring);
6151         }
6152
6153         amdgpu_ring_write(ring, header);
6154         amdgpu_ring_write(ring,
6155 #ifdef __BIG_ENDIAN
6156                           (2 << 0) |
6157 #endif
6158                           (ib->gpu_addr & 0xFFFFFFFC));
6159         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6160         amdgpu_ring_write(ring, control);
6161 }
6162
6163 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6164                                           struct amdgpu_ib *ib,
6165                                           unsigned vmid, bool ctx_switch)
6166 {
6167         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6168
6169         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6170         amdgpu_ring_write(ring,
6171 #ifdef __BIG_ENDIAN
6172                                 (2 << 0) |
6173 #endif
6174                                 (ib->gpu_addr & 0xFFFFFFFC));
6175         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6176         amdgpu_ring_write(ring, control);
6177 }
6178
6179 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6180                                          u64 seq, unsigned flags)
6181 {
6182         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6183         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6184
6185         /* EVENT_WRITE_EOP - flush caches, send int */
6186         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6187         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6188                                  EOP_TC_ACTION_EN |
6189                                  EOP_TC_WB_ACTION_EN |
6190                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6191                                  EVENT_INDEX(5)));
6192         amdgpu_ring_write(ring, addr & 0xfffffffc);
6193         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6194                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6195         amdgpu_ring_write(ring, lower_32_bits(seq));
6196         amdgpu_ring_write(ring, upper_32_bits(seq));
6197
6198 }
6199
6200 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6201 {
6202         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6203         uint32_t seq = ring->fence_drv.sync_seq;
6204         uint64_t addr = ring->fence_drv.gpu_addr;
6205
6206         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6207         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6208                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6209                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6210         amdgpu_ring_write(ring, addr & 0xfffffffc);
6211         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6212         amdgpu_ring_write(ring, seq);
6213         amdgpu_ring_write(ring, 0xffffffff);
6214         amdgpu_ring_write(ring, 4); /* poll interval */
6215 }
6216
6217 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6218                                         unsigned vmid, uint64_t pd_addr)
6219 {
6220         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6221
6222         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6223
6224         /* wait for the invalidate to complete */
6225         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6226         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6227                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6228                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6229         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6230         amdgpu_ring_write(ring, 0);
6231         amdgpu_ring_write(ring, 0); /* ref */
6232         amdgpu_ring_write(ring, 0); /* mask */
6233         amdgpu_ring_write(ring, 0x20); /* poll interval */
6234
6235         /* compute doesn't have PFP */
6236         if (usepfp) {
6237                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6238                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6239                 amdgpu_ring_write(ring, 0x0);
6240         }
6241 }
6242
6243 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6244 {
6245         return ring->adev->wb.wb[ring->wptr_offs];
6246 }
6247
6248 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6249 {
6250         struct amdgpu_device *adev = ring->adev;
6251
6252         /* XXX check if swapping is necessary on BE */
6253         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6254         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6255 }
6256
6257 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6258                                            bool acquire)
6259 {
6260         struct amdgpu_device *adev = ring->adev;
6261         int pipe_num, tmp, reg;
6262         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6263
6264         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6265
6266         /* first me only has 2 entries, GFX and HP3D */
6267         if (ring->me > 0)
6268                 pipe_num -= 2;
6269
6270         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6271         tmp = RREG32(reg);
6272         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6273         WREG32(reg, tmp);
6274 }
6275
6276 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6277                                             struct amdgpu_ring *ring,
6278                                             bool acquire)
6279 {
6280         int i, pipe;
6281         bool reserve;
6282         struct amdgpu_ring *iring;
6283
6284         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6285         pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6286         if (acquire)
6287                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6288         else
6289                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6290
6291         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6292                 /* Clear all reservations - everyone reacquires all resources */
6293                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6294                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6295                                                        true);
6296
6297                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6298                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6299                                                        true);
6300         } else {
6301                 /* Lower all pipes without a current reservation */
6302                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6303                         iring = &adev->gfx.gfx_ring[i];
6304                         pipe = amdgpu_gfx_queue_to_bit(adev,
6305                                                        iring->me,
6306                                                        iring->pipe,
6307                                                        0);
6308                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6309                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6310                 }
6311
6312                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6313                         iring = &adev->gfx.compute_ring[i];
6314                         pipe = amdgpu_gfx_queue_to_bit(adev,
6315                                                        iring->me,
6316                                                        iring->pipe,
6317                                                        0);
6318                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6319                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6320                 }
6321         }
6322
6323         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6324 }
6325
6326 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6327                                       struct amdgpu_ring *ring,
6328                                       bool acquire)
6329 {
6330         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6331         uint32_t queue_priority = acquire ? 0xf : 0x0;
6332
6333         mutex_lock(&adev->srbm_mutex);
6334         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6335
6336         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6337         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6338
6339         vi_srbm_select(adev, 0, 0, 0, 0);
6340         mutex_unlock(&adev->srbm_mutex);
6341 }
6342 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6343                                                enum drm_sched_priority priority)
6344 {
6345         struct amdgpu_device *adev = ring->adev;
6346         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6347
6348         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6349                 return;
6350
6351         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6352         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6353 }
6354
6355 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6356                                              u64 addr, u64 seq,
6357                                              unsigned flags)
6358 {
6359         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6360         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6361
6362         /* RELEASE_MEM - flush caches, send int */
6363         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6364         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6365                                  EOP_TC_ACTION_EN |
6366                                  EOP_TC_WB_ACTION_EN |
6367                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6368                                  EVENT_INDEX(5)));
6369         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6370         amdgpu_ring_write(ring, addr & 0xfffffffc);
6371         amdgpu_ring_write(ring, upper_32_bits(addr));
6372         amdgpu_ring_write(ring, lower_32_bits(seq));
6373         amdgpu_ring_write(ring, upper_32_bits(seq));
6374 }
6375
6376 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6377                                          u64 seq, unsigned int flags)
6378 {
6379         /* we only allocate 32bit for each seq wb address */
6380         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6381
6382         /* write fence seq to the "addr" */
6383         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6384         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6385                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6386         amdgpu_ring_write(ring, lower_32_bits(addr));
6387         amdgpu_ring_write(ring, upper_32_bits(addr));
6388         amdgpu_ring_write(ring, lower_32_bits(seq));
6389
6390         if (flags & AMDGPU_FENCE_FLAG_INT) {
6391                 /* set register to trigger INT */
6392                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6393                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6394                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6395                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6396                 amdgpu_ring_write(ring, 0);
6397                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6398         }
6399 }
6400
6401 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6402 {
6403         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6404         amdgpu_ring_write(ring, 0);
6405 }
6406
6407 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6408 {
6409         uint32_t dw2 = 0;
6410
6411         if (amdgpu_sriov_vf(ring->adev))
6412                 gfx_v8_0_ring_emit_ce_meta(ring);
6413
6414         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6415         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6416                 gfx_v8_0_ring_emit_vgt_flush(ring);
6417                 /* set load_global_config & load_global_uconfig */
6418                 dw2 |= 0x8001;
6419                 /* set load_cs_sh_regs */
6420                 dw2 |= 0x01000000;
6421                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6422                 dw2 |= 0x10002;
6423
6424                 /* set load_ce_ram if preamble presented */
6425                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6426                         dw2 |= 0x10000000;
6427         } else {
6428                 /* still load_ce_ram if this is the first time preamble presented
6429                  * although there is no context switch happens.
6430                  */
6431                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6432                         dw2 |= 0x10000000;
6433         }
6434
6435         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6436         amdgpu_ring_write(ring, dw2);
6437         amdgpu_ring_write(ring, 0);
6438 }
6439
6440 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6441 {
6442         unsigned ret;
6443
6444         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6445         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6446         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6447         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6448         ret = ring->wptr & ring->buf_mask;
6449         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6450         return ret;
6451 }
6452
6453 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6454 {
6455         unsigned cur;
6456
6457         BUG_ON(offset > ring->buf_mask);
6458         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6459
6460         cur = (ring->wptr & ring->buf_mask) - 1;
6461         if (likely(cur > offset))
6462                 ring->ring[offset] = cur - offset;
6463         else
6464                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6465 }
6466
6467 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6468 {
6469         struct amdgpu_device *adev = ring->adev;
6470
6471         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6472         amdgpu_ring_write(ring, 0 |     /* src: register*/
6473                                 (5 << 8) |      /* dst: memory */
6474                                 (1 << 20));     /* write confirm */
6475         amdgpu_ring_write(ring, reg);
6476         amdgpu_ring_write(ring, 0);
6477         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6478                                 adev->virt.reg_val_offs * 4));
6479         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6480                                 adev->virt.reg_val_offs * 4));
6481 }
6482
6483 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6484                                   uint32_t val)
6485 {
6486         uint32_t cmd;
6487
6488         switch (ring->funcs->type) {
6489         case AMDGPU_RING_TYPE_GFX:
6490                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6491                 break;
6492         case AMDGPU_RING_TYPE_KIQ:
6493                 cmd = 1 << 16; /* no inc addr */
6494                 break;
6495         default:
6496                 cmd = WR_CONFIRM;
6497                 break;
6498         }
6499
6500         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6501         amdgpu_ring_write(ring, cmd);
6502         amdgpu_ring_write(ring, reg);
6503         amdgpu_ring_write(ring, 0);
6504         amdgpu_ring_write(ring, val);
6505 }
6506
6507 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6508 {
6509         struct amdgpu_device *adev = ring->adev;
6510         uint32_t value = 0;
6511
6512         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6513         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6514         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6515         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6516         WREG32(mmSQ_CMD, value);
6517 }
6518
6519 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6520                                                  enum amdgpu_interrupt_state state)
6521 {
6522         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6523                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6524 }
6525
6526 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6527                                                      int me, int pipe,
6528                                                      enum amdgpu_interrupt_state state)
6529 {
6530         u32 mec_int_cntl, mec_int_cntl_reg;
6531
6532         /*
6533          * amdgpu controls only the first MEC. That's why this function only
6534          * handles the setting of interrupts for this specific MEC. All other
6535          * pipes' interrupts are set by amdkfd.
6536          */
6537
6538         if (me == 1) {
6539                 switch (pipe) {
6540                 case 0:
6541                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6542                         break;
6543                 case 1:
6544                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6545                         break;
6546                 case 2:
6547                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6548                         break;
6549                 case 3:
6550                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6551                         break;
6552                 default:
6553                         DRM_DEBUG("invalid pipe %d\n", pipe);
6554                         return;
6555                 }
6556         } else {
6557                 DRM_DEBUG("invalid me %d\n", me);
6558                 return;
6559         }
6560
6561         switch (state) {
6562         case AMDGPU_IRQ_STATE_DISABLE:
6563                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6564                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6565                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6566                 break;
6567         case AMDGPU_IRQ_STATE_ENABLE:
6568                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6569                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6570                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6571                 break;
6572         default:
6573                 break;
6574         }
6575 }
6576
6577 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6578                                              struct amdgpu_irq_src *source,
6579                                              unsigned type,
6580                                              enum amdgpu_interrupt_state state)
6581 {
6582         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6583                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6584
6585         return 0;
6586 }
6587
6588 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6589                                               struct amdgpu_irq_src *source,
6590                                               unsigned type,
6591                                               enum amdgpu_interrupt_state state)
6592 {
6593         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6594                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6595
6596         return 0;
6597 }
6598
6599 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6600                                             struct amdgpu_irq_src *src,
6601                                             unsigned type,
6602                                             enum amdgpu_interrupt_state state)
6603 {
6604         switch (type) {
6605         case AMDGPU_CP_IRQ_GFX_EOP:
6606                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6607                 break;
6608         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6609                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6610                 break;
6611         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6612                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6613                 break;
6614         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6615                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6616                 break;
6617         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6618                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6619                 break;
6620         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6621                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6622                 break;
6623         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6624                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6625                 break;
6626         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6627                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6628                 break;
6629         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6630                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6631                 break;
6632         default:
6633                 break;
6634         }
6635         return 0;
6636 }
6637
6638 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6639                                          struct amdgpu_irq_src *source,
6640                                          unsigned int type,
6641                                          enum amdgpu_interrupt_state state)
6642 {
6643         int enable_flag;
6644
6645         switch (state) {
6646         case AMDGPU_IRQ_STATE_DISABLE:
6647                 enable_flag = 0;
6648                 break;
6649
6650         case AMDGPU_IRQ_STATE_ENABLE:
6651                 enable_flag = 1;
6652                 break;
6653
6654         default:
6655                 return -EINVAL;
6656         }
6657
6658         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6659         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6660         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6661         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6662         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6663         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6664                      enable_flag);
6665         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6666                      enable_flag);
6667         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6668                      enable_flag);
6669         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6670                      enable_flag);
6671         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6672                      enable_flag);
6673         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6674                      enable_flag);
6675         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6676                      enable_flag);
6677         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6678                      enable_flag);
6679
6680         return 0;
6681 }
6682
6683 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6684                                      struct amdgpu_irq_src *source,
6685                                      unsigned int type,
6686                                      enum amdgpu_interrupt_state state)
6687 {
6688         int enable_flag;
6689
6690         switch (state) {
6691         case AMDGPU_IRQ_STATE_DISABLE:
6692                 enable_flag = 1;
6693                 break;
6694
6695         case AMDGPU_IRQ_STATE_ENABLE:
6696                 enable_flag = 0;
6697                 break;
6698
6699         default:
6700                 return -EINVAL;
6701         }
6702
6703         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6704                      enable_flag);
6705
6706         return 0;
6707 }
6708
6709 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6710                             struct amdgpu_irq_src *source,
6711                             struct amdgpu_iv_entry *entry)
6712 {
6713         int i;
6714         u8 me_id, pipe_id, queue_id;
6715         struct amdgpu_ring *ring;
6716
6717         DRM_DEBUG("IH: CP EOP\n");
6718         me_id = (entry->ring_id & 0x0c) >> 2;
6719         pipe_id = (entry->ring_id & 0x03) >> 0;
6720         queue_id = (entry->ring_id & 0x70) >> 4;
6721
6722         switch (me_id) {
6723         case 0:
6724                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6725                 break;
6726         case 1:
6727         case 2:
6728                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6729                         ring = &adev->gfx.compute_ring[i];
6730                         /* Per-queue interrupt is supported for MEC starting from VI.
6731                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6732                           */
6733                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6734                                 amdgpu_fence_process(ring);
6735                 }
6736                 break;
6737         }
6738         return 0;
6739 }
6740
6741 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6742                                  struct amdgpu_irq_src *source,
6743                                  struct amdgpu_iv_entry *entry)
6744 {
6745         DRM_ERROR("Illegal register access in command stream\n");
6746         schedule_work(&adev->reset_work);
6747         return 0;
6748 }
6749
6750 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6751                                   struct amdgpu_irq_src *source,
6752                                   struct amdgpu_iv_entry *entry)
6753 {
6754         DRM_ERROR("Illegal instruction in command stream\n");
6755         schedule_work(&adev->reset_work);
6756         return 0;
6757 }
6758
6759 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6760                                      struct amdgpu_irq_src *source,
6761                                      struct amdgpu_iv_entry *entry)
6762 {
6763         DRM_ERROR("CP EDC/ECC error detected.");
6764         return 0;
6765 }
6766
6767 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6768 {
6769         u32 enc, se_id, sh_id, cu_id;
6770         char type[20];
6771         int sq_edc_source = -1;
6772
6773         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6774         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6775
6776         switch (enc) {
6777                 case 0:
6778                         DRM_INFO("SQ general purpose intr detected:"
6779                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6780                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6781                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6782                                         "wlt %d, thread_trace %d.\n",
6783                                         se_id,
6784                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6785                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6786                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6787                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6788                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6789                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6790                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6791                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6792                                         );
6793                         break;
6794                 case 1:
6795                 case 2:
6796
6797                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6798                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6799
6800                         /*
6801                          * This function can be called either directly from ISR
6802                          * or from BH in which case we can access SQ_EDC_INFO
6803                          * instance
6804                          */
6805                         if (in_task()) {
6806                                 mutex_lock(&adev->grbm_idx_mutex);
6807                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6808
6809                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6810
6811                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6812                                 mutex_unlock(&adev->grbm_idx_mutex);
6813                         }
6814
6815                         if (enc == 1)
6816                                 sprintf(type, "instruction intr");
6817                         else
6818                                 sprintf(type, "EDC/ECC error");
6819
6820                         DRM_INFO(
6821                                 "SQ %s detected: "
6822                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6823                                         "trap %s, sq_ed_info.source %s.\n",
6824                                         type, se_id, sh_id, cu_id,
6825                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6826                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6827                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6828                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6829                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6830                                 );
6831                         break;
6832                 default:
6833                         DRM_ERROR("SQ invalid encoding type\n.");
6834         }
6835 }
6836
6837 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6838 {
6839
6840         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6841         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6842
6843         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6844 }
6845
6846 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6847                            struct amdgpu_irq_src *source,
6848                            struct amdgpu_iv_entry *entry)
6849 {
6850         unsigned ih_data = entry->src_data[0];
6851
6852         /*
6853          * Try to submit work so SQ_EDC_INFO can be accessed from
6854          * BH. If previous work submission hasn't finished yet
6855          * just print whatever info is possible directly from the ISR.
6856          */
6857         if (work_pending(&adev->gfx.sq_work.work)) {
6858                 gfx_v8_0_parse_sq_irq(adev, ih_data);
6859         } else {
6860                 adev->gfx.sq_work.ih_data = ih_data;
6861                 schedule_work(&adev->gfx.sq_work.work);
6862         }
6863
6864         return 0;
6865 }
6866
6867 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6868         .name = "gfx_v8_0",
6869         .early_init = gfx_v8_0_early_init,
6870         .late_init = gfx_v8_0_late_init,
6871         .sw_init = gfx_v8_0_sw_init,
6872         .sw_fini = gfx_v8_0_sw_fini,
6873         .hw_init = gfx_v8_0_hw_init,
6874         .hw_fini = gfx_v8_0_hw_fini,
6875         .suspend = gfx_v8_0_suspend,
6876         .resume = gfx_v8_0_resume,
6877         .is_idle = gfx_v8_0_is_idle,
6878         .wait_for_idle = gfx_v8_0_wait_for_idle,
6879         .check_soft_reset = gfx_v8_0_check_soft_reset,
6880         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6881         .soft_reset = gfx_v8_0_soft_reset,
6882         .post_soft_reset = gfx_v8_0_post_soft_reset,
6883         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6884         .set_powergating_state = gfx_v8_0_set_powergating_state,
6885         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6886 };
6887
6888 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6889         .type = AMDGPU_RING_TYPE_GFX,
6890         .align_mask = 0xff,
6891         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6892         .support_64bit_ptrs = false,
6893         .get_rptr = gfx_v8_0_ring_get_rptr,
6894         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6895         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6896         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6897                 5 +  /* COND_EXEC */
6898                 7 +  /* PIPELINE_SYNC */
6899                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6900                 8 +  /* FENCE for VM_FLUSH */
6901                 20 + /* GDS switch */
6902                 4 + /* double SWITCH_BUFFER,
6903                        the first COND_EXEC jump to the place just
6904                            prior to this double SWITCH_BUFFER  */
6905                 5 + /* COND_EXEC */
6906                 7 +      /*     HDP_flush */
6907                 4 +      /*     VGT_flush */
6908                 14 + /* CE_META */
6909                 31 + /* DE_META */
6910                 3 + /* CNTX_CTRL */
6911                 5 + /* HDP_INVL */
6912                 8 + 8 + /* FENCE x2 */
6913                 2, /* SWITCH_BUFFER */
6914         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6915         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6916         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6917         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6918         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6919         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6920         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6921         .test_ring = gfx_v8_0_ring_test_ring,
6922         .test_ib = gfx_v8_0_ring_test_ib,
6923         .insert_nop = amdgpu_ring_insert_nop,
6924         .pad_ib = amdgpu_ring_generic_pad_ib,
6925         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6926         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6927         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6928         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6929         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6930         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6931 };
6932
6933 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6934         .type = AMDGPU_RING_TYPE_COMPUTE,
6935         .align_mask = 0xff,
6936         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6937         .support_64bit_ptrs = false,
6938         .get_rptr = gfx_v8_0_ring_get_rptr,
6939         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6940         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6941         .emit_frame_size =
6942                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6943                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6944                 5 + /* hdp_invalidate */
6945                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6946                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6947                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6948         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6949         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6950         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6951         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6952         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6953         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6954         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6955         .test_ring = gfx_v8_0_ring_test_ring,
6956         .test_ib = gfx_v8_0_ring_test_ib,
6957         .insert_nop = amdgpu_ring_insert_nop,
6958         .pad_ib = amdgpu_ring_generic_pad_ib,
6959         .set_priority = gfx_v8_0_ring_set_priority_compute,
6960         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6961 };
6962
6963 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6964         .type = AMDGPU_RING_TYPE_KIQ,
6965         .align_mask = 0xff,
6966         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6967         .support_64bit_ptrs = false,
6968         .get_rptr = gfx_v8_0_ring_get_rptr,
6969         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6970         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6971         .emit_frame_size =
6972                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6973                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6974                 5 + /* hdp_invalidate */
6975                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6976                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6977                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6978         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6979         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6980         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6981         .test_ring = gfx_v8_0_ring_test_ring,
6982         .test_ib = gfx_v8_0_ring_test_ib,
6983         .insert_nop = amdgpu_ring_insert_nop,
6984         .pad_ib = amdgpu_ring_generic_pad_ib,
6985         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6986         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6987 };
6988
6989 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6990 {
6991         int i;
6992
6993         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6994
6995         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6996                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6997
6998         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6999                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7000 }
7001
7002 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7003         .set = gfx_v8_0_set_eop_interrupt_state,
7004         .process = gfx_v8_0_eop_irq,
7005 };
7006
7007 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7008         .set = gfx_v8_0_set_priv_reg_fault_state,
7009         .process = gfx_v8_0_priv_reg_irq,
7010 };
7011
7012 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7013         .set = gfx_v8_0_set_priv_inst_fault_state,
7014         .process = gfx_v8_0_priv_inst_irq,
7015 };
7016
7017 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7018         .set = gfx_v8_0_set_cp_ecc_int_state,
7019         .process = gfx_v8_0_cp_ecc_error_irq,
7020 };
7021
7022 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7023         .set = gfx_v8_0_set_sq_int_state,
7024         .process = gfx_v8_0_sq_irq,
7025 };
7026
7027 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7028 {
7029         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7030         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7031
7032         adev->gfx.priv_reg_irq.num_types = 1;
7033         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7034
7035         adev->gfx.priv_inst_irq.num_types = 1;
7036         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7037
7038         adev->gfx.cp_ecc_error_irq.num_types = 1;
7039         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7040
7041         adev->gfx.sq_irq.num_types = 1;
7042         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7043 }
7044
7045 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7046 {
7047         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7048 }
7049
7050 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7051 {
7052         /* init asci gds info */
7053         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7054         adev->gds.gws.total_size = 64;
7055         adev->gds.oa.total_size = 16;
7056
7057         if (adev->gds.mem.total_size == 64 * 1024) {
7058                 adev->gds.mem.gfx_partition_size = 4096;
7059                 adev->gds.mem.cs_partition_size = 4096;
7060
7061                 adev->gds.gws.gfx_partition_size = 4;
7062                 adev->gds.gws.cs_partition_size = 4;
7063
7064                 adev->gds.oa.gfx_partition_size = 4;
7065                 adev->gds.oa.cs_partition_size = 1;
7066         } else {
7067                 adev->gds.mem.gfx_partition_size = 1024;
7068                 adev->gds.mem.cs_partition_size = 1024;
7069
7070                 adev->gds.gws.gfx_partition_size = 16;
7071                 adev->gds.gws.cs_partition_size = 16;
7072
7073                 adev->gds.oa.gfx_partition_size = 4;
7074                 adev->gds.oa.cs_partition_size = 4;
7075         }
7076 }
7077
7078 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7079                                                  u32 bitmap)
7080 {
7081         u32 data;
7082
7083         if (!bitmap)
7084                 return;
7085
7086         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7087         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7088
7089         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7090 }
7091
7092 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7093 {
7094         u32 data, mask;
7095
7096         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7097                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7098
7099         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7100
7101         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7102 }
7103
7104 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7105 {
7106         int i, j, k, counter, active_cu_number = 0;
7107         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7108         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7109         unsigned disable_masks[4 * 2];
7110         u32 ao_cu_num;
7111
7112         memset(cu_info, 0, sizeof(*cu_info));
7113
7114         if (adev->flags & AMD_IS_APU)
7115                 ao_cu_num = 2;
7116         else
7117                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7118
7119         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7120
7121         mutex_lock(&adev->grbm_idx_mutex);
7122         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7123                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7124                         mask = 1;
7125                         ao_bitmap = 0;
7126                         counter = 0;
7127                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7128                         if (i < 4 && j < 2)
7129                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7130                                         adev, disable_masks[i * 2 + j]);
7131                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7132                         cu_info->bitmap[i][j] = bitmap;
7133
7134                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7135                                 if (bitmap & mask) {
7136                                         if (counter < ao_cu_num)
7137                                                 ao_bitmap |= mask;
7138                                         counter ++;
7139                                 }
7140                                 mask <<= 1;
7141                         }
7142                         active_cu_number += counter;
7143                         if (i < 2 && j < 2)
7144                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7145                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7146                 }
7147         }
7148         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7149         mutex_unlock(&adev->grbm_idx_mutex);
7150
7151         cu_info->number = active_cu_number;
7152         cu_info->ao_cu_mask = ao_cu_mask;
7153         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7154         cu_info->max_waves_per_simd = 10;
7155         cu_info->max_scratch_slots_per_cu = 32;
7156         cu_info->wave_front_size = 64;
7157         cu_info->lds_size = 64;
7158 }
7159
7160 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7161 {
7162         .type = AMD_IP_BLOCK_TYPE_GFX,
7163         .major = 8,
7164         .minor = 0,
7165         .rev = 0,
7166         .funcs = &gfx_v8_0_ip_funcs,
7167 };
7168
7169 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7170 {
7171         .type = AMD_IP_BLOCK_TYPE_GFX,
7172         .major = 8,
7173         .minor = 1,
7174         .rev = 0,
7175         .funcs = &gfx_v8_0_ip_funcs,
7176 };
7177
7178 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7179 {
7180         uint64_t ce_payload_addr;
7181         int cnt_ce;
7182         union {
7183                 struct vi_ce_ib_state regular;
7184                 struct vi_ce_ib_state_chained_ib chained;
7185         } ce_payload = {};
7186
7187         if (ring->adev->virt.chained_ib_support) {
7188                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7189                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7190                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7191         } else {
7192                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7193                         offsetof(struct vi_gfx_meta_data, ce_payload);
7194                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7195         }
7196
7197         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7198         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7199                                 WRITE_DATA_DST_SEL(8) |
7200                                 WR_CONFIRM) |
7201                                 WRITE_DATA_CACHE_POLICY(0));
7202         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7203         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7204         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7205 }
7206
7207 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7208 {
7209         uint64_t de_payload_addr, gds_addr, csa_addr;
7210         int cnt_de;
7211         union {
7212                 struct vi_de_ib_state regular;
7213                 struct vi_de_ib_state_chained_ib chained;
7214         } de_payload = {};
7215
7216         csa_addr = amdgpu_csa_vaddr(ring->adev);
7217         gds_addr = csa_addr + 4096;
7218         if (ring->adev->virt.chained_ib_support) {
7219                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7220                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7221                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7222                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7223         } else {
7224                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7225                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7226                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7227                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7228         }
7229
7230         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7231         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7232                                 WRITE_DATA_DST_SEL(8) |
7233                                 WR_CONFIRM) |
7234                                 WRITE_DATA_CACHE_POLICY(0));
7235         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7236         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7237         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7238 }