xfs: increase number of ACL entries for V5 superblocks
[linux-2.6-microblaze.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35
36 #define SI_PFP_UCODE_SIZE 2144
37 #define SI_PM4_UCODE_SIZE 2144
38 #define SI_CE_UCODE_SIZE 2144
39 #define SI_RLC_UCODE_SIZE 2048
40 #define SI_MC_UCODE_SIZE 7769
41 #define OLAND_MC_UCODE_SIZE 7863
42
43 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
46 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
52 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
53 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
54 MODULE_FIRMWARE("radeon/VERDE_me.bin");
55 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
56 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
57 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
58 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
59 MODULE_FIRMWARE("radeon/OLAND_me.bin");
60 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
61 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
62 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
63
64 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
65 extern void r600_ih_ring_fini(struct radeon_device *rdev);
66 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
70 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
71 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
72
73 static const u32 tahiti_golden_rlc_registers[] =
74 {
75         0xc424, 0xffffffff, 0x00601005,
76         0xc47c, 0xffffffff, 0x10104040,
77         0xc488, 0xffffffff, 0x0100000a,
78         0xc314, 0xffffffff, 0x00000800,
79         0xc30c, 0xffffffff, 0x800000f4,
80         0xf4a8, 0xffffffff, 0x00000000
81 };
82
83 static const u32 tahiti_golden_registers[] =
84 {
85         0x9a10, 0x00010000, 0x00018208,
86         0x9830, 0xffffffff, 0x00000000,
87         0x9834, 0xf00fffff, 0x00000400,
88         0x9838, 0x0002021c, 0x00020200,
89         0xc78, 0x00000080, 0x00000000,
90         0xd030, 0x000300c0, 0x00800040,
91         0xd830, 0x000300c0, 0x00800040,
92         0x5bb0, 0x000000f0, 0x00000070,
93         0x5bc0, 0x00200000, 0x50100000,
94         0x7030, 0x31000311, 0x00000011,
95         0x277c, 0x00000003, 0x000007ff,
96         0x240c, 0x000007ff, 0x00000000,
97         0x8a14, 0xf000001f, 0x00000007,
98         0x8b24, 0xffffffff, 0x00ffffff,
99         0x8b10, 0x0000ff0f, 0x00000000,
100         0x28a4c, 0x07ffffff, 0x4e000000,
101         0x28350, 0x3f3f3fff, 0x2a00126a,
102         0x30, 0x000000ff, 0x0040,
103         0x34, 0x00000040, 0x00004040,
104         0x9100, 0x07ffffff, 0x03000000,
105         0x8e88, 0x01ff1f3f, 0x00000000,
106         0x8e84, 0x01ff1f3f, 0x00000000,
107         0x9060, 0x0000007f, 0x00000020,
108         0x9508, 0x00010000, 0x00010000,
109         0xac14, 0x00000200, 0x000002fb,
110         0xac10, 0xffffffff, 0x0000543b,
111         0xac0c, 0xffffffff, 0xa9210876,
112         0x88d0, 0xffffffff, 0x000fff40,
113         0x88d4, 0x0000001f, 0x00000010,
114         0x1410, 0x20000000, 0x20fffed8,
115         0x15c0, 0x000c0fc0, 0x000c0400
116 };
117
118 static const u32 tahiti_golden_registers2[] =
119 {
120         0xc64, 0x00000001, 0x00000001
121 };
122
123 static const u32 pitcairn_golden_rlc_registers[] =
124 {
125         0xc424, 0xffffffff, 0x00601004,
126         0xc47c, 0xffffffff, 0x10102020,
127         0xc488, 0xffffffff, 0x01000020,
128         0xc314, 0xffffffff, 0x00000800,
129         0xc30c, 0xffffffff, 0x800000a4
130 };
131
132 static const u32 pitcairn_golden_registers[] =
133 {
134         0x9a10, 0x00010000, 0x00018208,
135         0x9830, 0xffffffff, 0x00000000,
136         0x9834, 0xf00fffff, 0x00000400,
137         0x9838, 0x0002021c, 0x00020200,
138         0xc78, 0x00000080, 0x00000000,
139         0xd030, 0x000300c0, 0x00800040,
140         0xd830, 0x000300c0, 0x00800040,
141         0x5bb0, 0x000000f0, 0x00000070,
142         0x5bc0, 0x00200000, 0x50100000,
143         0x7030, 0x31000311, 0x00000011,
144         0x2ae4, 0x00073ffe, 0x000022a2,
145         0x240c, 0x000007ff, 0x00000000,
146         0x8a14, 0xf000001f, 0x00000007,
147         0x8b24, 0xffffffff, 0x00ffffff,
148         0x8b10, 0x0000ff0f, 0x00000000,
149         0x28a4c, 0x07ffffff, 0x4e000000,
150         0x28350, 0x3f3f3fff, 0x2a00126a,
151         0x30, 0x000000ff, 0x0040,
152         0x34, 0x00000040, 0x00004040,
153         0x9100, 0x07ffffff, 0x03000000,
154         0x9060, 0x0000007f, 0x00000020,
155         0x9508, 0x00010000, 0x00010000,
156         0xac14, 0x000003ff, 0x000000f7,
157         0xac10, 0xffffffff, 0x00000000,
158         0xac0c, 0xffffffff, 0x32761054,
159         0x88d4, 0x0000001f, 0x00000010,
160         0x15c0, 0x000c0fc0, 0x000c0400
161 };
162
163 static const u32 verde_golden_rlc_registers[] =
164 {
165         0xc424, 0xffffffff, 0x033f1005,
166         0xc47c, 0xffffffff, 0x10808020,
167         0xc488, 0xffffffff, 0x00800008,
168         0xc314, 0xffffffff, 0x00001000,
169         0xc30c, 0xffffffff, 0x80010014
170 };
171
172 static const u32 verde_golden_registers[] =
173 {
174         0x9a10, 0x00010000, 0x00018208,
175         0x9830, 0xffffffff, 0x00000000,
176         0x9834, 0xf00fffff, 0x00000400,
177         0x9838, 0x0002021c, 0x00020200,
178         0xc78, 0x00000080, 0x00000000,
179         0xd030, 0x000300c0, 0x00800040,
180         0xd030, 0x000300c0, 0x00800040,
181         0xd830, 0x000300c0, 0x00800040,
182         0xd830, 0x000300c0, 0x00800040,
183         0x5bb0, 0x000000f0, 0x00000070,
184         0x5bc0, 0x00200000, 0x50100000,
185         0x7030, 0x31000311, 0x00000011,
186         0x2ae4, 0x00073ffe, 0x000022a2,
187         0x2ae4, 0x00073ffe, 0x000022a2,
188         0x2ae4, 0x00073ffe, 0x000022a2,
189         0x240c, 0x000007ff, 0x00000000,
190         0x240c, 0x000007ff, 0x00000000,
191         0x240c, 0x000007ff, 0x00000000,
192         0x8a14, 0xf000001f, 0x00000007,
193         0x8a14, 0xf000001f, 0x00000007,
194         0x8a14, 0xf000001f, 0x00000007,
195         0x8b24, 0xffffffff, 0x00ffffff,
196         0x8b10, 0x0000ff0f, 0x00000000,
197         0x28a4c, 0x07ffffff, 0x4e000000,
198         0x28350, 0x3f3f3fff, 0x0000124a,
199         0x28350, 0x3f3f3fff, 0x0000124a,
200         0x28350, 0x3f3f3fff, 0x0000124a,
201         0x30, 0x000000ff, 0x0040,
202         0x34, 0x00000040, 0x00004040,
203         0x9100, 0x07ffffff, 0x03000000,
204         0x9100, 0x07ffffff, 0x03000000,
205         0x8e88, 0x01ff1f3f, 0x00000000,
206         0x8e88, 0x01ff1f3f, 0x00000000,
207         0x8e88, 0x01ff1f3f, 0x00000000,
208         0x8e84, 0x01ff1f3f, 0x00000000,
209         0x8e84, 0x01ff1f3f, 0x00000000,
210         0x8e84, 0x01ff1f3f, 0x00000000,
211         0x9060, 0x0000007f, 0x00000020,
212         0x9508, 0x00010000, 0x00010000,
213         0xac14, 0x000003ff, 0x00000003,
214         0xac14, 0x000003ff, 0x00000003,
215         0xac14, 0x000003ff, 0x00000003,
216         0xac10, 0xffffffff, 0x00000000,
217         0xac10, 0xffffffff, 0x00000000,
218         0xac10, 0xffffffff, 0x00000000,
219         0xac0c, 0xffffffff, 0x00001032,
220         0xac0c, 0xffffffff, 0x00001032,
221         0xac0c, 0xffffffff, 0x00001032,
222         0x88d4, 0x0000001f, 0x00000010,
223         0x88d4, 0x0000001f, 0x00000010,
224         0x88d4, 0x0000001f, 0x00000010,
225         0x15c0, 0x000c0fc0, 0x000c0400
226 };
227
228 static const u32 oland_golden_rlc_registers[] =
229 {
230         0xc424, 0xffffffff, 0x00601005,
231         0xc47c, 0xffffffff, 0x10104040,
232         0xc488, 0xffffffff, 0x0100000a,
233         0xc314, 0xffffffff, 0x00000800,
234         0xc30c, 0xffffffff, 0x800000f4
235 };
236
237 static const u32 oland_golden_registers[] =
238 {
239         0x9a10, 0x00010000, 0x00018208,
240         0x9830, 0xffffffff, 0x00000000,
241         0x9834, 0xf00fffff, 0x00000400,
242         0x9838, 0x0002021c, 0x00020200,
243         0xc78, 0x00000080, 0x00000000,
244         0xd030, 0x000300c0, 0x00800040,
245         0xd830, 0x000300c0, 0x00800040,
246         0x5bb0, 0x000000f0, 0x00000070,
247         0x5bc0, 0x00200000, 0x50100000,
248         0x7030, 0x31000311, 0x00000011,
249         0x2ae4, 0x00073ffe, 0x000022a2,
250         0x240c, 0x000007ff, 0x00000000,
251         0x8a14, 0xf000001f, 0x00000007,
252         0x8b24, 0xffffffff, 0x00ffffff,
253         0x8b10, 0x0000ff0f, 0x00000000,
254         0x28a4c, 0x07ffffff, 0x4e000000,
255         0x28350, 0x3f3f3fff, 0x00000082,
256         0x30, 0x000000ff, 0x0040,
257         0x34, 0x00000040, 0x00004040,
258         0x9100, 0x07ffffff, 0x03000000,
259         0x9060, 0x0000007f, 0x00000020,
260         0x9508, 0x00010000, 0x00010000,
261         0xac14, 0x000003ff, 0x000000f3,
262         0xac10, 0xffffffff, 0x00000000,
263         0xac0c, 0xffffffff, 0x00003210,
264         0x88d4, 0x0000001f, 0x00000010,
265         0x15c0, 0x000c0fc0, 0x000c0400
266 };
267
268 static const u32 tahiti_mgcg_cgcg_init[] =
269 {
270         0xc400, 0xffffffff, 0xfffffffc,
271         0x802c, 0xffffffff, 0xe0000000,
272         0x9a60, 0xffffffff, 0x00000100,
273         0x92a4, 0xffffffff, 0x00000100,
274         0xc164, 0xffffffff, 0x00000100,
275         0x9774, 0xffffffff, 0x00000100,
276         0x8984, 0xffffffff, 0x06000100,
277         0x8a18, 0xffffffff, 0x00000100,
278         0x92a0, 0xffffffff, 0x00000100,
279         0xc380, 0xffffffff, 0x00000100,
280         0x8b28, 0xffffffff, 0x00000100,
281         0x9144, 0xffffffff, 0x00000100,
282         0x8d88, 0xffffffff, 0x00000100,
283         0x8d8c, 0xffffffff, 0x00000100,
284         0x9030, 0xffffffff, 0x00000100,
285         0x9034, 0xffffffff, 0x00000100,
286         0x9038, 0xffffffff, 0x00000100,
287         0x903c, 0xffffffff, 0x00000100,
288         0xad80, 0xffffffff, 0x00000100,
289         0xac54, 0xffffffff, 0x00000100,
290         0x897c, 0xffffffff, 0x06000100,
291         0x9868, 0xffffffff, 0x00000100,
292         0x9510, 0xffffffff, 0x00000100,
293         0xaf04, 0xffffffff, 0x00000100,
294         0xae04, 0xffffffff, 0x00000100,
295         0x949c, 0xffffffff, 0x00000100,
296         0x802c, 0xffffffff, 0xe0000000,
297         0x9160, 0xffffffff, 0x00010000,
298         0x9164, 0xffffffff, 0x00030002,
299         0x9168, 0xffffffff, 0x00040007,
300         0x916c, 0xffffffff, 0x00060005,
301         0x9170, 0xffffffff, 0x00090008,
302         0x9174, 0xffffffff, 0x00020001,
303         0x9178, 0xffffffff, 0x00040003,
304         0x917c, 0xffffffff, 0x00000007,
305         0x9180, 0xffffffff, 0x00060005,
306         0x9184, 0xffffffff, 0x00090008,
307         0x9188, 0xffffffff, 0x00030002,
308         0x918c, 0xffffffff, 0x00050004,
309         0x9190, 0xffffffff, 0x00000008,
310         0x9194, 0xffffffff, 0x00070006,
311         0x9198, 0xffffffff, 0x000a0009,
312         0x919c, 0xffffffff, 0x00040003,
313         0x91a0, 0xffffffff, 0x00060005,
314         0x91a4, 0xffffffff, 0x00000009,
315         0x91a8, 0xffffffff, 0x00080007,
316         0x91ac, 0xffffffff, 0x000b000a,
317         0x91b0, 0xffffffff, 0x00050004,
318         0x91b4, 0xffffffff, 0x00070006,
319         0x91b8, 0xffffffff, 0x0008000b,
320         0x91bc, 0xffffffff, 0x000a0009,
321         0x91c0, 0xffffffff, 0x000d000c,
322         0x91c4, 0xffffffff, 0x00060005,
323         0x91c8, 0xffffffff, 0x00080007,
324         0x91cc, 0xffffffff, 0x0000000b,
325         0x91d0, 0xffffffff, 0x000a0009,
326         0x91d4, 0xffffffff, 0x000d000c,
327         0x91d8, 0xffffffff, 0x00070006,
328         0x91dc, 0xffffffff, 0x00090008,
329         0x91e0, 0xffffffff, 0x0000000c,
330         0x91e4, 0xffffffff, 0x000b000a,
331         0x91e8, 0xffffffff, 0x000e000d,
332         0x91ec, 0xffffffff, 0x00080007,
333         0x91f0, 0xffffffff, 0x000a0009,
334         0x91f4, 0xffffffff, 0x0000000d,
335         0x91f8, 0xffffffff, 0x000c000b,
336         0x91fc, 0xffffffff, 0x000f000e,
337         0x9200, 0xffffffff, 0x00090008,
338         0x9204, 0xffffffff, 0x000b000a,
339         0x9208, 0xffffffff, 0x000c000f,
340         0x920c, 0xffffffff, 0x000e000d,
341         0x9210, 0xffffffff, 0x00110010,
342         0x9214, 0xffffffff, 0x000a0009,
343         0x9218, 0xffffffff, 0x000c000b,
344         0x921c, 0xffffffff, 0x0000000f,
345         0x9220, 0xffffffff, 0x000e000d,
346         0x9224, 0xffffffff, 0x00110010,
347         0x9228, 0xffffffff, 0x000b000a,
348         0x922c, 0xffffffff, 0x000d000c,
349         0x9230, 0xffffffff, 0x00000010,
350         0x9234, 0xffffffff, 0x000f000e,
351         0x9238, 0xffffffff, 0x00120011,
352         0x923c, 0xffffffff, 0x000c000b,
353         0x9240, 0xffffffff, 0x000e000d,
354         0x9244, 0xffffffff, 0x00000011,
355         0x9248, 0xffffffff, 0x0010000f,
356         0x924c, 0xffffffff, 0x00130012,
357         0x9250, 0xffffffff, 0x000d000c,
358         0x9254, 0xffffffff, 0x000f000e,
359         0x9258, 0xffffffff, 0x00100013,
360         0x925c, 0xffffffff, 0x00120011,
361         0x9260, 0xffffffff, 0x00150014,
362         0x9264, 0xffffffff, 0x000e000d,
363         0x9268, 0xffffffff, 0x0010000f,
364         0x926c, 0xffffffff, 0x00000013,
365         0x9270, 0xffffffff, 0x00120011,
366         0x9274, 0xffffffff, 0x00150014,
367         0x9278, 0xffffffff, 0x000f000e,
368         0x927c, 0xffffffff, 0x00110010,
369         0x9280, 0xffffffff, 0x00000014,
370         0x9284, 0xffffffff, 0x00130012,
371         0x9288, 0xffffffff, 0x00160015,
372         0x928c, 0xffffffff, 0x0010000f,
373         0x9290, 0xffffffff, 0x00120011,
374         0x9294, 0xffffffff, 0x00000015,
375         0x9298, 0xffffffff, 0x00140013,
376         0x929c, 0xffffffff, 0x00170016,
377         0x9150, 0xffffffff, 0x96940200,
378         0x8708, 0xffffffff, 0x00900100,
379         0xc478, 0xffffffff, 0x00000080,
380         0xc404, 0xffffffff, 0x0020003f,
381         0x30, 0xffffffff, 0x0000001c,
382         0x34, 0x000f0000, 0x000f0000,
383         0x160c, 0xffffffff, 0x00000100,
384         0x1024, 0xffffffff, 0x00000100,
385         0x102c, 0x00000101, 0x00000000,
386         0x20a8, 0xffffffff, 0x00000104,
387         0x264c, 0x000c0000, 0x000c0000,
388         0x2648, 0x000c0000, 0x000c0000,
389         0x55e4, 0xff000fff, 0x00000100,
390         0x55e8, 0x00000001, 0x00000001,
391         0x2f50, 0x00000001, 0x00000001,
392         0x30cc, 0xc0000fff, 0x00000104,
393         0xc1e4, 0x00000001, 0x00000001,
394         0xd0c0, 0xfffffff0, 0x00000100,
395         0xd8c0, 0xfffffff0, 0x00000100
396 };
397
398 static const u32 pitcairn_mgcg_cgcg_init[] =
399 {
400         0xc400, 0xffffffff, 0xfffffffc,
401         0x802c, 0xffffffff, 0xe0000000,
402         0x9a60, 0xffffffff, 0x00000100,
403         0x92a4, 0xffffffff, 0x00000100,
404         0xc164, 0xffffffff, 0x00000100,
405         0x9774, 0xffffffff, 0x00000100,
406         0x8984, 0xffffffff, 0x06000100,
407         0x8a18, 0xffffffff, 0x00000100,
408         0x92a0, 0xffffffff, 0x00000100,
409         0xc380, 0xffffffff, 0x00000100,
410         0x8b28, 0xffffffff, 0x00000100,
411         0x9144, 0xffffffff, 0x00000100,
412         0x8d88, 0xffffffff, 0x00000100,
413         0x8d8c, 0xffffffff, 0x00000100,
414         0x9030, 0xffffffff, 0x00000100,
415         0x9034, 0xffffffff, 0x00000100,
416         0x9038, 0xffffffff, 0x00000100,
417         0x903c, 0xffffffff, 0x00000100,
418         0xad80, 0xffffffff, 0x00000100,
419         0xac54, 0xffffffff, 0x00000100,
420         0x897c, 0xffffffff, 0x06000100,
421         0x9868, 0xffffffff, 0x00000100,
422         0x9510, 0xffffffff, 0x00000100,
423         0xaf04, 0xffffffff, 0x00000100,
424         0xae04, 0xffffffff, 0x00000100,
425         0x949c, 0xffffffff, 0x00000100,
426         0x802c, 0xffffffff, 0xe0000000,
427         0x9160, 0xffffffff, 0x00010000,
428         0x9164, 0xffffffff, 0x00030002,
429         0x9168, 0xffffffff, 0x00040007,
430         0x916c, 0xffffffff, 0x00060005,
431         0x9170, 0xffffffff, 0x00090008,
432         0x9174, 0xffffffff, 0x00020001,
433         0x9178, 0xffffffff, 0x00040003,
434         0x917c, 0xffffffff, 0x00000007,
435         0x9180, 0xffffffff, 0x00060005,
436         0x9184, 0xffffffff, 0x00090008,
437         0x9188, 0xffffffff, 0x00030002,
438         0x918c, 0xffffffff, 0x00050004,
439         0x9190, 0xffffffff, 0x00000008,
440         0x9194, 0xffffffff, 0x00070006,
441         0x9198, 0xffffffff, 0x000a0009,
442         0x919c, 0xffffffff, 0x00040003,
443         0x91a0, 0xffffffff, 0x00060005,
444         0x91a4, 0xffffffff, 0x00000009,
445         0x91a8, 0xffffffff, 0x00080007,
446         0x91ac, 0xffffffff, 0x000b000a,
447         0x91b0, 0xffffffff, 0x00050004,
448         0x91b4, 0xffffffff, 0x00070006,
449         0x91b8, 0xffffffff, 0x0008000b,
450         0x91bc, 0xffffffff, 0x000a0009,
451         0x91c0, 0xffffffff, 0x000d000c,
452         0x9200, 0xffffffff, 0x00090008,
453         0x9204, 0xffffffff, 0x000b000a,
454         0x9208, 0xffffffff, 0x000c000f,
455         0x920c, 0xffffffff, 0x000e000d,
456         0x9210, 0xffffffff, 0x00110010,
457         0x9214, 0xffffffff, 0x000a0009,
458         0x9218, 0xffffffff, 0x000c000b,
459         0x921c, 0xffffffff, 0x0000000f,
460         0x9220, 0xffffffff, 0x000e000d,
461         0x9224, 0xffffffff, 0x00110010,
462         0x9228, 0xffffffff, 0x000b000a,
463         0x922c, 0xffffffff, 0x000d000c,
464         0x9230, 0xffffffff, 0x00000010,
465         0x9234, 0xffffffff, 0x000f000e,
466         0x9238, 0xffffffff, 0x00120011,
467         0x923c, 0xffffffff, 0x000c000b,
468         0x9240, 0xffffffff, 0x000e000d,
469         0x9244, 0xffffffff, 0x00000011,
470         0x9248, 0xffffffff, 0x0010000f,
471         0x924c, 0xffffffff, 0x00130012,
472         0x9250, 0xffffffff, 0x000d000c,
473         0x9254, 0xffffffff, 0x000f000e,
474         0x9258, 0xffffffff, 0x00100013,
475         0x925c, 0xffffffff, 0x00120011,
476         0x9260, 0xffffffff, 0x00150014,
477         0x9150, 0xffffffff, 0x96940200,
478         0x8708, 0xffffffff, 0x00900100,
479         0xc478, 0xffffffff, 0x00000080,
480         0xc404, 0xffffffff, 0x0020003f,
481         0x30, 0xffffffff, 0x0000001c,
482         0x34, 0x000f0000, 0x000f0000,
483         0x160c, 0xffffffff, 0x00000100,
484         0x1024, 0xffffffff, 0x00000100,
485         0x102c, 0x00000101, 0x00000000,
486         0x20a8, 0xffffffff, 0x00000104,
487         0x55e4, 0xff000fff, 0x00000100,
488         0x55e8, 0x00000001, 0x00000001,
489         0x2f50, 0x00000001, 0x00000001,
490         0x30cc, 0xc0000fff, 0x00000104,
491         0xc1e4, 0x00000001, 0x00000001,
492         0xd0c0, 0xfffffff0, 0x00000100,
493         0xd8c0, 0xfffffff0, 0x00000100
494 };
495
496 static const u32 verde_mgcg_cgcg_init[] =
497 {
498         0xc400, 0xffffffff, 0xfffffffc,
499         0x802c, 0xffffffff, 0xe0000000,
500         0x9a60, 0xffffffff, 0x00000100,
501         0x92a4, 0xffffffff, 0x00000100,
502         0xc164, 0xffffffff, 0x00000100,
503         0x9774, 0xffffffff, 0x00000100,
504         0x8984, 0xffffffff, 0x06000100,
505         0x8a18, 0xffffffff, 0x00000100,
506         0x92a0, 0xffffffff, 0x00000100,
507         0xc380, 0xffffffff, 0x00000100,
508         0x8b28, 0xffffffff, 0x00000100,
509         0x9144, 0xffffffff, 0x00000100,
510         0x8d88, 0xffffffff, 0x00000100,
511         0x8d8c, 0xffffffff, 0x00000100,
512         0x9030, 0xffffffff, 0x00000100,
513         0x9034, 0xffffffff, 0x00000100,
514         0x9038, 0xffffffff, 0x00000100,
515         0x903c, 0xffffffff, 0x00000100,
516         0xad80, 0xffffffff, 0x00000100,
517         0xac54, 0xffffffff, 0x00000100,
518         0x897c, 0xffffffff, 0x06000100,
519         0x9868, 0xffffffff, 0x00000100,
520         0x9510, 0xffffffff, 0x00000100,
521         0xaf04, 0xffffffff, 0x00000100,
522         0xae04, 0xffffffff, 0x00000100,
523         0x949c, 0xffffffff, 0x00000100,
524         0x802c, 0xffffffff, 0xe0000000,
525         0x9160, 0xffffffff, 0x00010000,
526         0x9164, 0xffffffff, 0x00030002,
527         0x9168, 0xffffffff, 0x00040007,
528         0x916c, 0xffffffff, 0x00060005,
529         0x9170, 0xffffffff, 0x00090008,
530         0x9174, 0xffffffff, 0x00020001,
531         0x9178, 0xffffffff, 0x00040003,
532         0x917c, 0xffffffff, 0x00000007,
533         0x9180, 0xffffffff, 0x00060005,
534         0x9184, 0xffffffff, 0x00090008,
535         0x9188, 0xffffffff, 0x00030002,
536         0x918c, 0xffffffff, 0x00050004,
537         0x9190, 0xffffffff, 0x00000008,
538         0x9194, 0xffffffff, 0x00070006,
539         0x9198, 0xffffffff, 0x000a0009,
540         0x919c, 0xffffffff, 0x00040003,
541         0x91a0, 0xffffffff, 0x00060005,
542         0x91a4, 0xffffffff, 0x00000009,
543         0x91a8, 0xffffffff, 0x00080007,
544         0x91ac, 0xffffffff, 0x000b000a,
545         0x91b0, 0xffffffff, 0x00050004,
546         0x91b4, 0xffffffff, 0x00070006,
547         0x91b8, 0xffffffff, 0x0008000b,
548         0x91bc, 0xffffffff, 0x000a0009,
549         0x91c0, 0xffffffff, 0x000d000c,
550         0x9200, 0xffffffff, 0x00090008,
551         0x9204, 0xffffffff, 0x000b000a,
552         0x9208, 0xffffffff, 0x000c000f,
553         0x920c, 0xffffffff, 0x000e000d,
554         0x9210, 0xffffffff, 0x00110010,
555         0x9214, 0xffffffff, 0x000a0009,
556         0x9218, 0xffffffff, 0x000c000b,
557         0x921c, 0xffffffff, 0x0000000f,
558         0x9220, 0xffffffff, 0x000e000d,
559         0x9224, 0xffffffff, 0x00110010,
560         0x9228, 0xffffffff, 0x000b000a,
561         0x922c, 0xffffffff, 0x000d000c,
562         0x9230, 0xffffffff, 0x00000010,
563         0x9234, 0xffffffff, 0x000f000e,
564         0x9238, 0xffffffff, 0x00120011,
565         0x923c, 0xffffffff, 0x000c000b,
566         0x9240, 0xffffffff, 0x000e000d,
567         0x9244, 0xffffffff, 0x00000011,
568         0x9248, 0xffffffff, 0x0010000f,
569         0x924c, 0xffffffff, 0x00130012,
570         0x9250, 0xffffffff, 0x000d000c,
571         0x9254, 0xffffffff, 0x000f000e,
572         0x9258, 0xffffffff, 0x00100013,
573         0x925c, 0xffffffff, 0x00120011,
574         0x9260, 0xffffffff, 0x00150014,
575         0x9150, 0xffffffff, 0x96940200,
576         0x8708, 0xffffffff, 0x00900100,
577         0xc478, 0xffffffff, 0x00000080,
578         0xc404, 0xffffffff, 0x0020003f,
579         0x30, 0xffffffff, 0x0000001c,
580         0x34, 0x000f0000, 0x000f0000,
581         0x160c, 0xffffffff, 0x00000100,
582         0x1024, 0xffffffff, 0x00000100,
583         0x102c, 0x00000101, 0x00000000,
584         0x20a8, 0xffffffff, 0x00000104,
585         0x264c, 0x000c0000, 0x000c0000,
586         0x2648, 0x000c0000, 0x000c0000,
587         0x55e4, 0xff000fff, 0x00000100,
588         0x55e8, 0x00000001, 0x00000001,
589         0x2f50, 0x00000001, 0x00000001,
590         0x30cc, 0xc0000fff, 0x00000104,
591         0xc1e4, 0x00000001, 0x00000001,
592         0xd0c0, 0xfffffff0, 0x00000100,
593         0xd8c0, 0xfffffff0, 0x00000100
594 };
595
596 static const u32 oland_mgcg_cgcg_init[] =
597 {
598         0xc400, 0xffffffff, 0xfffffffc,
599         0x802c, 0xffffffff, 0xe0000000,
600         0x9a60, 0xffffffff, 0x00000100,
601         0x92a4, 0xffffffff, 0x00000100,
602         0xc164, 0xffffffff, 0x00000100,
603         0x9774, 0xffffffff, 0x00000100,
604         0x8984, 0xffffffff, 0x06000100,
605         0x8a18, 0xffffffff, 0x00000100,
606         0x92a0, 0xffffffff, 0x00000100,
607         0xc380, 0xffffffff, 0x00000100,
608         0x8b28, 0xffffffff, 0x00000100,
609         0x9144, 0xffffffff, 0x00000100,
610         0x8d88, 0xffffffff, 0x00000100,
611         0x8d8c, 0xffffffff, 0x00000100,
612         0x9030, 0xffffffff, 0x00000100,
613         0x9034, 0xffffffff, 0x00000100,
614         0x9038, 0xffffffff, 0x00000100,
615         0x903c, 0xffffffff, 0x00000100,
616         0xad80, 0xffffffff, 0x00000100,
617         0xac54, 0xffffffff, 0x00000100,
618         0x897c, 0xffffffff, 0x06000100,
619         0x9868, 0xffffffff, 0x00000100,
620         0x9510, 0xffffffff, 0x00000100,
621         0xaf04, 0xffffffff, 0x00000100,
622         0xae04, 0xffffffff, 0x00000100,
623         0x949c, 0xffffffff, 0x00000100,
624         0x802c, 0xffffffff, 0xe0000000,
625         0x9160, 0xffffffff, 0x00010000,
626         0x9164, 0xffffffff, 0x00030002,
627         0x9168, 0xffffffff, 0x00040007,
628         0x916c, 0xffffffff, 0x00060005,
629         0x9170, 0xffffffff, 0x00090008,
630         0x9174, 0xffffffff, 0x00020001,
631         0x9178, 0xffffffff, 0x00040003,
632         0x917c, 0xffffffff, 0x00000007,
633         0x9180, 0xffffffff, 0x00060005,
634         0x9184, 0xffffffff, 0x00090008,
635         0x9188, 0xffffffff, 0x00030002,
636         0x918c, 0xffffffff, 0x00050004,
637         0x9190, 0xffffffff, 0x00000008,
638         0x9194, 0xffffffff, 0x00070006,
639         0x9198, 0xffffffff, 0x000a0009,
640         0x919c, 0xffffffff, 0x00040003,
641         0x91a0, 0xffffffff, 0x00060005,
642         0x91a4, 0xffffffff, 0x00000009,
643         0x91a8, 0xffffffff, 0x00080007,
644         0x91ac, 0xffffffff, 0x000b000a,
645         0x91b0, 0xffffffff, 0x00050004,
646         0x91b4, 0xffffffff, 0x00070006,
647         0x91b8, 0xffffffff, 0x0008000b,
648         0x91bc, 0xffffffff, 0x000a0009,
649         0x91c0, 0xffffffff, 0x000d000c,
650         0x91c4, 0xffffffff, 0x00060005,
651         0x91c8, 0xffffffff, 0x00080007,
652         0x91cc, 0xffffffff, 0x0000000b,
653         0x91d0, 0xffffffff, 0x000a0009,
654         0x91d4, 0xffffffff, 0x000d000c,
655         0x9150, 0xffffffff, 0x96940200,
656         0x8708, 0xffffffff, 0x00900100,
657         0xc478, 0xffffffff, 0x00000080,
658         0xc404, 0xffffffff, 0x0020003f,
659         0x30, 0xffffffff, 0x0000001c,
660         0x34, 0x000f0000, 0x000f0000,
661         0x160c, 0xffffffff, 0x00000100,
662         0x1024, 0xffffffff, 0x00000100,
663         0x102c, 0x00000101, 0x00000000,
664         0x20a8, 0xffffffff, 0x00000104,
665         0x264c, 0x000c0000, 0x000c0000,
666         0x2648, 0x000c0000, 0x000c0000,
667         0x55e4, 0xff000fff, 0x00000100,
668         0x55e8, 0x00000001, 0x00000001,
669         0x2f50, 0x00000001, 0x00000001,
670         0x30cc, 0xc0000fff, 0x00000104,
671         0xc1e4, 0x00000001, 0x00000001,
672         0xd0c0, 0xfffffff0, 0x00000100,
673         0xd8c0, 0xfffffff0, 0x00000100
674 };
675
676 static u32 verde_pg_init[] =
677 {
678         0x353c, 0xffffffff, 0x40000,
679         0x3538, 0xffffffff, 0x200010ff,
680         0x353c, 0xffffffff, 0x0,
681         0x353c, 0xffffffff, 0x0,
682         0x353c, 0xffffffff, 0x0,
683         0x353c, 0xffffffff, 0x0,
684         0x353c, 0xffffffff, 0x0,
685         0x353c, 0xffffffff, 0x7007,
686         0x3538, 0xffffffff, 0x300010ff,
687         0x353c, 0xffffffff, 0x0,
688         0x353c, 0xffffffff, 0x0,
689         0x353c, 0xffffffff, 0x0,
690         0x353c, 0xffffffff, 0x0,
691         0x353c, 0xffffffff, 0x0,
692         0x353c, 0xffffffff, 0x400000,
693         0x3538, 0xffffffff, 0x100010ff,
694         0x353c, 0xffffffff, 0x0,
695         0x353c, 0xffffffff, 0x0,
696         0x353c, 0xffffffff, 0x0,
697         0x353c, 0xffffffff, 0x0,
698         0x353c, 0xffffffff, 0x0,
699         0x353c, 0xffffffff, 0x120200,
700         0x3538, 0xffffffff, 0x500010ff,
701         0x353c, 0xffffffff, 0x0,
702         0x353c, 0xffffffff, 0x0,
703         0x353c, 0xffffffff, 0x0,
704         0x353c, 0xffffffff, 0x0,
705         0x353c, 0xffffffff, 0x0,
706         0x353c, 0xffffffff, 0x1e1e16,
707         0x3538, 0xffffffff, 0x600010ff,
708         0x353c, 0xffffffff, 0x0,
709         0x353c, 0xffffffff, 0x0,
710         0x353c, 0xffffffff, 0x0,
711         0x353c, 0xffffffff, 0x0,
712         0x353c, 0xffffffff, 0x0,
713         0x353c, 0xffffffff, 0x171f1e,
714         0x3538, 0xffffffff, 0x700010ff,
715         0x353c, 0xffffffff, 0x0,
716         0x353c, 0xffffffff, 0x0,
717         0x353c, 0xffffffff, 0x0,
718         0x353c, 0xffffffff, 0x0,
719         0x353c, 0xffffffff, 0x0,
720         0x353c, 0xffffffff, 0x0,
721         0x3538, 0xffffffff, 0x9ff,
722         0x3500, 0xffffffff, 0x0,
723         0x3504, 0xffffffff, 0x10000800,
724         0x3504, 0xffffffff, 0xf,
725         0x3504, 0xffffffff, 0xf,
726         0x3500, 0xffffffff, 0x4,
727         0x3504, 0xffffffff, 0x1000051e,
728         0x3504, 0xffffffff, 0xffff,
729         0x3504, 0xffffffff, 0xffff,
730         0x3500, 0xffffffff, 0x8,
731         0x3504, 0xffffffff, 0x80500,
732         0x3500, 0xffffffff, 0x12,
733         0x3504, 0xffffffff, 0x9050c,
734         0x3500, 0xffffffff, 0x1d,
735         0x3504, 0xffffffff, 0xb052c,
736         0x3500, 0xffffffff, 0x2a,
737         0x3504, 0xffffffff, 0x1053e,
738         0x3500, 0xffffffff, 0x2d,
739         0x3504, 0xffffffff, 0x10546,
740         0x3500, 0xffffffff, 0x30,
741         0x3504, 0xffffffff, 0xa054e,
742         0x3500, 0xffffffff, 0x3c,
743         0x3504, 0xffffffff, 0x1055f,
744         0x3500, 0xffffffff, 0x3f,
745         0x3504, 0xffffffff, 0x10567,
746         0x3500, 0xffffffff, 0x42,
747         0x3504, 0xffffffff, 0x1056f,
748         0x3500, 0xffffffff, 0x45,
749         0x3504, 0xffffffff, 0x10572,
750         0x3500, 0xffffffff, 0x48,
751         0x3504, 0xffffffff, 0x20575,
752         0x3500, 0xffffffff, 0x4c,
753         0x3504, 0xffffffff, 0x190801,
754         0x3500, 0xffffffff, 0x67,
755         0x3504, 0xffffffff, 0x1082a,
756         0x3500, 0xffffffff, 0x6a,
757         0x3504, 0xffffffff, 0x1b082d,
758         0x3500, 0xffffffff, 0x87,
759         0x3504, 0xffffffff, 0x310851,
760         0x3500, 0xffffffff, 0xba,
761         0x3504, 0xffffffff, 0x891,
762         0x3500, 0xffffffff, 0xbc,
763         0x3504, 0xffffffff, 0x893,
764         0x3500, 0xffffffff, 0xbe,
765         0x3504, 0xffffffff, 0x20895,
766         0x3500, 0xffffffff, 0xc2,
767         0x3504, 0xffffffff, 0x20899,
768         0x3500, 0xffffffff, 0xc6,
769         0x3504, 0xffffffff, 0x2089d,
770         0x3500, 0xffffffff, 0xca,
771         0x3504, 0xffffffff, 0x8a1,
772         0x3500, 0xffffffff, 0xcc,
773         0x3504, 0xffffffff, 0x8a3,
774         0x3500, 0xffffffff, 0xce,
775         0x3504, 0xffffffff, 0x308a5,
776         0x3500, 0xffffffff, 0xd3,
777         0x3504, 0xffffffff, 0x6d08cd,
778         0x3500, 0xffffffff, 0x142,
779         0x3504, 0xffffffff, 0x2000095a,
780         0x3504, 0xffffffff, 0x1,
781         0x3500, 0xffffffff, 0x144,
782         0x3504, 0xffffffff, 0x301f095b,
783         0x3500, 0xffffffff, 0x165,
784         0x3504, 0xffffffff, 0xc094d,
785         0x3500, 0xffffffff, 0x173,
786         0x3504, 0xffffffff, 0xf096d,
787         0x3500, 0xffffffff, 0x184,
788         0x3504, 0xffffffff, 0x15097f,
789         0x3500, 0xffffffff, 0x19b,
790         0x3504, 0xffffffff, 0xc0998,
791         0x3500, 0xffffffff, 0x1a9,
792         0x3504, 0xffffffff, 0x409a7,
793         0x3500, 0xffffffff, 0x1af,
794         0x3504, 0xffffffff, 0xcdc,
795         0x3500, 0xffffffff, 0x1b1,
796         0x3504, 0xffffffff, 0x800,
797         0x3508, 0xffffffff, 0x6c9b2000,
798         0x3510, 0xfc00, 0x2000,
799         0x3544, 0xffffffff, 0xfc0,
800         0x28d4, 0x00000100, 0x100
801 };
802
803 static void si_init_golden_registers(struct radeon_device *rdev)
804 {
805         switch (rdev->family) {
806         case CHIP_TAHITI:
807                 radeon_program_register_sequence(rdev,
808                                                  tahiti_golden_registers,
809                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
810                 radeon_program_register_sequence(rdev,
811                                                  tahiti_golden_rlc_registers,
812                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
813                 radeon_program_register_sequence(rdev,
814                                                  tahiti_mgcg_cgcg_init,
815                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
816                 radeon_program_register_sequence(rdev,
817                                                  tahiti_golden_registers2,
818                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
819                 break;
820         case CHIP_PITCAIRN:
821                 radeon_program_register_sequence(rdev,
822                                                  pitcairn_golden_registers,
823                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
824                 radeon_program_register_sequence(rdev,
825                                                  pitcairn_golden_rlc_registers,
826                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
827                 radeon_program_register_sequence(rdev,
828                                                  pitcairn_mgcg_cgcg_init,
829                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
830                 break;
831         case CHIP_VERDE:
832                 radeon_program_register_sequence(rdev,
833                                                  verde_golden_registers,
834                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
835                 radeon_program_register_sequence(rdev,
836                                                  verde_golden_rlc_registers,
837                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
838                 radeon_program_register_sequence(rdev,
839                                                  verde_mgcg_cgcg_init,
840                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
841                 radeon_program_register_sequence(rdev,
842                                                  verde_pg_init,
843                                                  (const u32)ARRAY_SIZE(verde_pg_init));
844                 break;
845         case CHIP_OLAND:
846                 radeon_program_register_sequence(rdev,
847                                                  oland_golden_registers,
848                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
849                 radeon_program_register_sequence(rdev,
850                                                  oland_golden_rlc_registers,
851                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
852                 radeon_program_register_sequence(rdev,
853                                                  oland_mgcg_cgcg_init,
854                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
855                 break;
856         default:
857                 break;
858         }
859 }
860
861 #define PCIE_BUS_CLK                10000
862 #define TCLK                        (PCIE_BUS_CLK / 10)
863
864 /**
865  * si_get_xclk - get the xclk
866  *
867  * @rdev: radeon_device pointer
868  *
869  * Returns the reference clock used by the gfx engine
870  * (SI).
871  */
872 u32 si_get_xclk(struct radeon_device *rdev)
873 {
874         u32 reference_clock = rdev->clock.spll.reference_freq;
875         u32 tmp;
876
877         tmp = RREG32(CG_CLKPIN_CNTL_2);
878         if (tmp & MUX_TCLK_TO_XCLK)
879                 return TCLK;
880
881         tmp = RREG32(CG_CLKPIN_CNTL);
882         if (tmp & XTALIN_DIVIDE)
883                 return reference_clock / 4;
884
885         return reference_clock;
886 }
887
888 /* get temperature in millidegrees */
889 int si_get_temp(struct radeon_device *rdev)
890 {
891         u32 temp;
892         int actual_temp = 0;
893
894         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
895                 CTF_TEMP_SHIFT;
896
897         if (temp & 0x200)
898                 actual_temp = 255;
899         else
900                 actual_temp = temp & 0x1ff;
901
902         actual_temp = (actual_temp * 1000);
903
904         return actual_temp;
905 }
906
907 #define TAHITI_IO_MC_REGS_SIZE 36
908
909 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
910         {0x0000006f, 0x03044000},
911         {0x00000070, 0x0480c018},
912         {0x00000071, 0x00000040},
913         {0x00000072, 0x01000000},
914         {0x00000074, 0x000000ff},
915         {0x00000075, 0x00143400},
916         {0x00000076, 0x08ec0800},
917         {0x00000077, 0x040000cc},
918         {0x00000079, 0x00000000},
919         {0x0000007a, 0x21000409},
920         {0x0000007c, 0x00000000},
921         {0x0000007d, 0xe8000000},
922         {0x0000007e, 0x044408a8},
923         {0x0000007f, 0x00000003},
924         {0x00000080, 0x00000000},
925         {0x00000081, 0x01000000},
926         {0x00000082, 0x02000000},
927         {0x00000083, 0x00000000},
928         {0x00000084, 0xe3f3e4f4},
929         {0x00000085, 0x00052024},
930         {0x00000087, 0x00000000},
931         {0x00000088, 0x66036603},
932         {0x00000089, 0x01000000},
933         {0x0000008b, 0x1c0a0000},
934         {0x0000008c, 0xff010000},
935         {0x0000008e, 0xffffefff},
936         {0x0000008f, 0xfff3efff},
937         {0x00000090, 0xfff3efbf},
938         {0x00000094, 0x00101101},
939         {0x00000095, 0x00000fff},
940         {0x00000096, 0x00116fff},
941         {0x00000097, 0x60010000},
942         {0x00000098, 0x10010000},
943         {0x00000099, 0x00006000},
944         {0x0000009a, 0x00001000},
945         {0x0000009f, 0x00a77400}
946 };
947
948 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
949         {0x0000006f, 0x03044000},
950         {0x00000070, 0x0480c018},
951         {0x00000071, 0x00000040},
952         {0x00000072, 0x01000000},
953         {0x00000074, 0x000000ff},
954         {0x00000075, 0x00143400},
955         {0x00000076, 0x08ec0800},
956         {0x00000077, 0x040000cc},
957         {0x00000079, 0x00000000},
958         {0x0000007a, 0x21000409},
959         {0x0000007c, 0x00000000},
960         {0x0000007d, 0xe8000000},
961         {0x0000007e, 0x044408a8},
962         {0x0000007f, 0x00000003},
963         {0x00000080, 0x00000000},
964         {0x00000081, 0x01000000},
965         {0x00000082, 0x02000000},
966         {0x00000083, 0x00000000},
967         {0x00000084, 0xe3f3e4f4},
968         {0x00000085, 0x00052024},
969         {0x00000087, 0x00000000},
970         {0x00000088, 0x66036603},
971         {0x00000089, 0x01000000},
972         {0x0000008b, 0x1c0a0000},
973         {0x0000008c, 0xff010000},
974         {0x0000008e, 0xffffefff},
975         {0x0000008f, 0xfff3efff},
976         {0x00000090, 0xfff3efbf},
977         {0x00000094, 0x00101101},
978         {0x00000095, 0x00000fff},
979         {0x00000096, 0x00116fff},
980         {0x00000097, 0x60010000},
981         {0x00000098, 0x10010000},
982         {0x00000099, 0x00006000},
983         {0x0000009a, 0x00001000},
984         {0x0000009f, 0x00a47400}
985 };
986
987 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
988         {0x0000006f, 0x03044000},
989         {0x00000070, 0x0480c018},
990         {0x00000071, 0x00000040},
991         {0x00000072, 0x01000000},
992         {0x00000074, 0x000000ff},
993         {0x00000075, 0x00143400},
994         {0x00000076, 0x08ec0800},
995         {0x00000077, 0x040000cc},
996         {0x00000079, 0x00000000},
997         {0x0000007a, 0x21000409},
998         {0x0000007c, 0x00000000},
999         {0x0000007d, 0xe8000000},
1000         {0x0000007e, 0x044408a8},
1001         {0x0000007f, 0x00000003},
1002         {0x00000080, 0x00000000},
1003         {0x00000081, 0x01000000},
1004         {0x00000082, 0x02000000},
1005         {0x00000083, 0x00000000},
1006         {0x00000084, 0xe3f3e4f4},
1007         {0x00000085, 0x00052024},
1008         {0x00000087, 0x00000000},
1009         {0x00000088, 0x66036603},
1010         {0x00000089, 0x01000000},
1011         {0x0000008b, 0x1c0a0000},
1012         {0x0000008c, 0xff010000},
1013         {0x0000008e, 0xffffefff},
1014         {0x0000008f, 0xfff3efff},
1015         {0x00000090, 0xfff3efbf},
1016         {0x00000094, 0x00101101},
1017         {0x00000095, 0x00000fff},
1018         {0x00000096, 0x00116fff},
1019         {0x00000097, 0x60010000},
1020         {0x00000098, 0x10010000},
1021         {0x00000099, 0x00006000},
1022         {0x0000009a, 0x00001000},
1023         {0x0000009f, 0x00a37400}
1024 };
1025
1026 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1027         {0x0000006f, 0x03044000},
1028         {0x00000070, 0x0480c018},
1029         {0x00000071, 0x00000040},
1030         {0x00000072, 0x01000000},
1031         {0x00000074, 0x000000ff},
1032         {0x00000075, 0x00143400},
1033         {0x00000076, 0x08ec0800},
1034         {0x00000077, 0x040000cc},
1035         {0x00000079, 0x00000000},
1036         {0x0000007a, 0x21000409},
1037         {0x0000007c, 0x00000000},
1038         {0x0000007d, 0xe8000000},
1039         {0x0000007e, 0x044408a8},
1040         {0x0000007f, 0x00000003},
1041         {0x00000080, 0x00000000},
1042         {0x00000081, 0x01000000},
1043         {0x00000082, 0x02000000},
1044         {0x00000083, 0x00000000},
1045         {0x00000084, 0xe3f3e4f4},
1046         {0x00000085, 0x00052024},
1047         {0x00000087, 0x00000000},
1048         {0x00000088, 0x66036603},
1049         {0x00000089, 0x01000000},
1050         {0x0000008b, 0x1c0a0000},
1051         {0x0000008c, 0xff010000},
1052         {0x0000008e, 0xffffefff},
1053         {0x0000008f, 0xfff3efff},
1054         {0x00000090, 0xfff3efbf},
1055         {0x00000094, 0x00101101},
1056         {0x00000095, 0x00000fff},
1057         {0x00000096, 0x00116fff},
1058         {0x00000097, 0x60010000},
1059         {0x00000098, 0x10010000},
1060         {0x00000099, 0x00006000},
1061         {0x0000009a, 0x00001000},
1062         {0x0000009f, 0x00a17730}
1063 };
1064
1065 /* ucode loading */
1066 static int si_mc_load_microcode(struct radeon_device *rdev)
1067 {
1068         const __be32 *fw_data;
1069         u32 running, blackout = 0;
1070         u32 *io_mc_regs;
1071         int i, ucode_size, regs_size;
1072
1073         if (!rdev->mc_fw)
1074                 return -EINVAL;
1075
1076         switch (rdev->family) {
1077         case CHIP_TAHITI:
1078                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1079                 ucode_size = SI_MC_UCODE_SIZE;
1080                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1081                 break;
1082         case CHIP_PITCAIRN:
1083                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1084                 ucode_size = SI_MC_UCODE_SIZE;
1085                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1086                 break;
1087         case CHIP_VERDE:
1088         default:
1089                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1090                 ucode_size = SI_MC_UCODE_SIZE;
1091                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1092                 break;
1093         case CHIP_OLAND:
1094                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1095                 ucode_size = OLAND_MC_UCODE_SIZE;
1096                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1097                 break;
1098         }
1099
1100         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1101
1102         if (running == 0) {
1103                 if (running) {
1104                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1105                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1106                 }
1107
1108                 /* reset the engine and set to writable */
1109                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1110                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1111
1112                 /* load mc io regs */
1113                 for (i = 0; i < regs_size; i++) {
1114                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1115                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1116                 }
1117                 /* load the MC ucode */
1118                 fw_data = (const __be32 *)rdev->mc_fw->data;
1119                 for (i = 0; i < ucode_size; i++)
1120                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1121
1122                 /* put the engine back into the active state */
1123                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1124                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1125                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1126
1127                 /* wait for training to complete */
1128                 for (i = 0; i < rdev->usec_timeout; i++) {
1129                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1130                                 break;
1131                         udelay(1);
1132                 }
1133                 for (i = 0; i < rdev->usec_timeout; i++) {
1134                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1135                                 break;
1136                         udelay(1);
1137                 }
1138
1139                 if (running)
1140                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1141         }
1142
1143         return 0;
1144 }
1145
1146 static int si_init_microcode(struct radeon_device *rdev)
1147 {
1148         struct platform_device *pdev;
1149         const char *chip_name;
1150         const char *rlc_chip_name;
1151         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1152         char fw_name[30];
1153         int err;
1154
1155         DRM_DEBUG("\n");
1156
1157         pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
1158         err = IS_ERR(pdev);
1159         if (err) {
1160                 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
1161                 return -EINVAL;
1162         }
1163
1164         switch (rdev->family) {
1165         case CHIP_TAHITI:
1166                 chip_name = "TAHITI";
1167                 rlc_chip_name = "TAHITI";
1168                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1169                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1170                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1171                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1172                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1173                 break;
1174         case CHIP_PITCAIRN:
1175                 chip_name = "PITCAIRN";
1176                 rlc_chip_name = "PITCAIRN";
1177                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1178                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1179                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1180                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1181                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1182                 break;
1183         case CHIP_VERDE:
1184                 chip_name = "VERDE";
1185                 rlc_chip_name = "VERDE";
1186                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1187                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1188                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1189                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1190                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1191                 break;
1192         case CHIP_OLAND:
1193                 chip_name = "OLAND";
1194                 rlc_chip_name = "OLAND";
1195                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1196                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1197                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1198                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1199                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1200                 break;
1201         default: BUG();
1202         }
1203
1204         DRM_INFO("Loading %s Microcode\n", chip_name);
1205
1206         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1207         err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
1208         if (err)
1209                 goto out;
1210         if (rdev->pfp_fw->size != pfp_req_size) {
1211                 printk(KERN_ERR
1212                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1213                        rdev->pfp_fw->size, fw_name);
1214                 err = -EINVAL;
1215                 goto out;
1216         }
1217
1218         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1219         err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
1220         if (err)
1221                 goto out;
1222         if (rdev->me_fw->size != me_req_size) {
1223                 printk(KERN_ERR
1224                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1225                        rdev->me_fw->size, fw_name);
1226                 err = -EINVAL;
1227         }
1228
1229         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1230         err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
1231         if (err)
1232                 goto out;
1233         if (rdev->ce_fw->size != ce_req_size) {
1234                 printk(KERN_ERR
1235                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1236                        rdev->ce_fw->size, fw_name);
1237                 err = -EINVAL;
1238         }
1239
1240         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1241         err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
1242         if (err)
1243                 goto out;
1244         if (rdev->rlc_fw->size != rlc_req_size) {
1245                 printk(KERN_ERR
1246                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1247                        rdev->rlc_fw->size, fw_name);
1248                 err = -EINVAL;
1249         }
1250
1251         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1252         err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
1253         if (err)
1254                 goto out;
1255         if (rdev->mc_fw->size != mc_req_size) {
1256                 printk(KERN_ERR
1257                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1258                        rdev->mc_fw->size, fw_name);
1259                 err = -EINVAL;
1260         }
1261
1262 out:
1263         platform_device_unregister(pdev);
1264
1265         if (err) {
1266                 if (err != -EINVAL)
1267                         printk(KERN_ERR
1268                                "si_cp: Failed to load firmware \"%s\"\n",
1269                                fw_name);
1270                 release_firmware(rdev->pfp_fw);
1271                 rdev->pfp_fw = NULL;
1272                 release_firmware(rdev->me_fw);
1273                 rdev->me_fw = NULL;
1274                 release_firmware(rdev->ce_fw);
1275                 rdev->ce_fw = NULL;
1276                 release_firmware(rdev->rlc_fw);
1277                 rdev->rlc_fw = NULL;
1278                 release_firmware(rdev->mc_fw);
1279                 rdev->mc_fw = NULL;
1280         }
1281         return err;
1282 }
1283
1284 /* watermark setup */
1285 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1286                                    struct radeon_crtc *radeon_crtc,
1287                                    struct drm_display_mode *mode,
1288                                    struct drm_display_mode *other_mode)
1289 {
1290         u32 tmp;
1291         /*
1292          * Line Buffer Setup
1293          * There are 3 line buffers, each one shared by 2 display controllers.
1294          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1295          * the display controllers.  The paritioning is done via one of four
1296          * preset allocations specified in bits 21:20:
1297          *  0 - half lb
1298          *  2 - whole lb, other crtc must be disabled
1299          */
1300         /* this can get tricky if we have two large displays on a paired group
1301          * of crtcs.  Ideally for multiple large displays we'd assign them to
1302          * non-linked crtcs for maximum line buffer allocation.
1303          */
1304         if (radeon_crtc->base.enabled && mode) {
1305                 if (other_mode)
1306                         tmp = 0; /* 1/2 */
1307                 else
1308                         tmp = 2; /* whole */
1309         } else
1310                 tmp = 0;
1311
1312         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1313                DC_LB_MEMORY_CONFIG(tmp));
1314
1315         if (radeon_crtc->base.enabled && mode) {
1316                 switch (tmp) {
1317                 case 0:
1318                 default:
1319                         return 4096 * 2;
1320                 case 2:
1321                         return 8192 * 2;
1322                 }
1323         }
1324
1325         /* controller not enabled, so no lb used */
1326         return 0;
1327 }
1328
1329 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1330 {
1331         u32 tmp = RREG32(MC_SHARED_CHMAP);
1332
1333         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1334         case 0:
1335         default:
1336                 return 1;
1337         case 1:
1338                 return 2;
1339         case 2:
1340                 return 4;
1341         case 3:
1342                 return 8;
1343         case 4:
1344                 return 3;
1345         case 5:
1346                 return 6;
1347         case 6:
1348                 return 10;
1349         case 7:
1350                 return 12;
1351         case 8:
1352                 return 16;
1353         }
1354 }
1355
1356 struct dce6_wm_params {
1357         u32 dram_channels; /* number of dram channels */
1358         u32 yclk;          /* bandwidth per dram data pin in kHz */
1359         u32 sclk;          /* engine clock in kHz */
1360         u32 disp_clk;      /* display clock in kHz */
1361         u32 src_width;     /* viewport width */
1362         u32 active_time;   /* active display time in ns */
1363         u32 blank_time;    /* blank time in ns */
1364         bool interlaced;    /* mode is interlaced */
1365         fixed20_12 vsc;    /* vertical scale ratio */
1366         u32 num_heads;     /* number of active crtcs */
1367         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1368         u32 lb_size;       /* line buffer allocated to pipe */
1369         u32 vtaps;         /* vertical scaler taps */
1370 };
1371
1372 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1373 {
1374         /* Calculate raw DRAM Bandwidth */
1375         fixed20_12 dram_efficiency; /* 0.7 */
1376         fixed20_12 yclk, dram_channels, bandwidth;
1377         fixed20_12 a;
1378
1379         a.full = dfixed_const(1000);
1380         yclk.full = dfixed_const(wm->yclk);
1381         yclk.full = dfixed_div(yclk, a);
1382         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1383         a.full = dfixed_const(10);
1384         dram_efficiency.full = dfixed_const(7);
1385         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1386         bandwidth.full = dfixed_mul(dram_channels, yclk);
1387         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1388
1389         return dfixed_trunc(bandwidth);
1390 }
1391
1392 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1393 {
1394         /* Calculate DRAM Bandwidth and the part allocated to display. */
1395         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1396         fixed20_12 yclk, dram_channels, bandwidth;
1397         fixed20_12 a;
1398
1399         a.full = dfixed_const(1000);
1400         yclk.full = dfixed_const(wm->yclk);
1401         yclk.full = dfixed_div(yclk, a);
1402         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1403         a.full = dfixed_const(10);
1404         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1405         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1406         bandwidth.full = dfixed_mul(dram_channels, yclk);
1407         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1408
1409         return dfixed_trunc(bandwidth);
1410 }
1411
1412 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1413 {
1414         /* Calculate the display Data return Bandwidth */
1415         fixed20_12 return_efficiency; /* 0.8 */
1416         fixed20_12 sclk, bandwidth;
1417         fixed20_12 a;
1418
1419         a.full = dfixed_const(1000);
1420         sclk.full = dfixed_const(wm->sclk);
1421         sclk.full = dfixed_div(sclk, a);
1422         a.full = dfixed_const(10);
1423         return_efficiency.full = dfixed_const(8);
1424         return_efficiency.full = dfixed_div(return_efficiency, a);
1425         a.full = dfixed_const(32);
1426         bandwidth.full = dfixed_mul(a, sclk);
1427         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1428
1429         return dfixed_trunc(bandwidth);
1430 }
1431
1432 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1433 {
1434         return 32;
1435 }
1436
1437 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1438 {
1439         /* Calculate the DMIF Request Bandwidth */
1440         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1441         fixed20_12 disp_clk, sclk, bandwidth;
1442         fixed20_12 a, b1, b2;
1443         u32 min_bandwidth;
1444
1445         a.full = dfixed_const(1000);
1446         disp_clk.full = dfixed_const(wm->disp_clk);
1447         disp_clk.full = dfixed_div(disp_clk, a);
1448         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1449         b1.full = dfixed_mul(a, disp_clk);
1450
1451         a.full = dfixed_const(1000);
1452         sclk.full = dfixed_const(wm->sclk);
1453         sclk.full = dfixed_div(sclk, a);
1454         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1455         b2.full = dfixed_mul(a, sclk);
1456
1457         a.full = dfixed_const(10);
1458         disp_clk_request_efficiency.full = dfixed_const(8);
1459         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1460
1461         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1462
1463         a.full = dfixed_const(min_bandwidth);
1464         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1465
1466         return dfixed_trunc(bandwidth);
1467 }
1468
1469 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1470 {
1471         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1472         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1473         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1474         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1475
1476         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1477 }
1478
1479 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1480 {
1481         /* Calculate the display mode Average Bandwidth
1482          * DisplayMode should contain the source and destination dimensions,
1483          * timing, etc.
1484          */
1485         fixed20_12 bpp;
1486         fixed20_12 line_time;
1487         fixed20_12 src_width;
1488         fixed20_12 bandwidth;
1489         fixed20_12 a;
1490
1491         a.full = dfixed_const(1000);
1492         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1493         line_time.full = dfixed_div(line_time, a);
1494         bpp.full = dfixed_const(wm->bytes_per_pixel);
1495         src_width.full = dfixed_const(wm->src_width);
1496         bandwidth.full = dfixed_mul(src_width, bpp);
1497         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1498         bandwidth.full = dfixed_div(bandwidth, line_time);
1499
1500         return dfixed_trunc(bandwidth);
1501 }
1502
1503 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1504 {
1505         /* First calcualte the latency in ns */
1506         u32 mc_latency = 2000; /* 2000 ns. */
1507         u32 available_bandwidth = dce6_available_bandwidth(wm);
1508         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1509         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1510         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1511         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1512                 (wm->num_heads * cursor_line_pair_return_time);
1513         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1514         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1515         u32 tmp, dmif_size = 12288;
1516         fixed20_12 a, b, c;
1517
1518         if (wm->num_heads == 0)
1519                 return 0;
1520
1521         a.full = dfixed_const(2);
1522         b.full = dfixed_const(1);
1523         if ((wm->vsc.full > a.full) ||
1524             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1525             (wm->vtaps >= 5) ||
1526             ((wm->vsc.full >= a.full) && wm->interlaced))
1527                 max_src_lines_per_dst_line = 4;
1528         else
1529                 max_src_lines_per_dst_line = 2;
1530
1531         a.full = dfixed_const(available_bandwidth);
1532         b.full = dfixed_const(wm->num_heads);
1533         a.full = dfixed_div(a, b);
1534
1535         b.full = dfixed_const(mc_latency + 512);
1536         c.full = dfixed_const(wm->disp_clk);
1537         b.full = dfixed_div(b, c);
1538
1539         c.full = dfixed_const(dmif_size);
1540         b.full = dfixed_div(c, b);
1541
1542         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1543
1544         b.full = dfixed_const(1000);
1545         c.full = dfixed_const(wm->disp_clk);
1546         b.full = dfixed_div(c, b);
1547         c.full = dfixed_const(wm->bytes_per_pixel);
1548         b.full = dfixed_mul(b, c);
1549
1550         lb_fill_bw = min(tmp, dfixed_trunc(b));
1551
1552         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1553         b.full = dfixed_const(1000);
1554         c.full = dfixed_const(lb_fill_bw);
1555         b.full = dfixed_div(c, b);
1556         a.full = dfixed_div(a, b);
1557         line_fill_time = dfixed_trunc(a);
1558
1559         if (line_fill_time < wm->active_time)
1560                 return latency;
1561         else
1562                 return latency + (line_fill_time - wm->active_time);
1563
1564 }
1565
1566 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1567 {
1568         if (dce6_average_bandwidth(wm) <=
1569             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1570                 return true;
1571         else
1572                 return false;
1573 };
1574
1575 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
1576 {
1577         if (dce6_average_bandwidth(wm) <=
1578             (dce6_available_bandwidth(wm) / wm->num_heads))
1579                 return true;
1580         else
1581                 return false;
1582 };
1583
1584 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
1585 {
1586         u32 lb_partitions = wm->lb_size / wm->src_width;
1587         u32 line_time = wm->active_time + wm->blank_time;
1588         u32 latency_tolerant_lines;
1589         u32 latency_hiding;
1590         fixed20_12 a;
1591
1592         a.full = dfixed_const(1);
1593         if (wm->vsc.full > a.full)
1594                 latency_tolerant_lines = 1;
1595         else {
1596                 if (lb_partitions <= (wm->vtaps + 1))
1597                         latency_tolerant_lines = 1;
1598                 else
1599                         latency_tolerant_lines = 2;
1600         }
1601
1602         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
1603
1604         if (dce6_latency_watermark(wm) <= latency_hiding)
1605                 return true;
1606         else
1607                 return false;
1608 }
1609
1610 static void dce6_program_watermarks(struct radeon_device *rdev,
1611                                          struct radeon_crtc *radeon_crtc,
1612                                          u32 lb_size, u32 num_heads)
1613 {
1614         struct drm_display_mode *mode = &radeon_crtc->base.mode;
1615         struct dce6_wm_params wm;
1616         u32 pixel_period;
1617         u32 line_time = 0;
1618         u32 latency_watermark_a = 0, latency_watermark_b = 0;
1619         u32 priority_a_mark = 0, priority_b_mark = 0;
1620         u32 priority_a_cnt = PRIORITY_OFF;
1621         u32 priority_b_cnt = PRIORITY_OFF;
1622         u32 tmp, arb_control3;
1623         fixed20_12 a, b, c;
1624
1625         if (radeon_crtc->base.enabled && num_heads && mode) {
1626                 pixel_period = 1000000 / (u32)mode->clock;
1627                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
1628                 priority_a_cnt = 0;
1629                 priority_b_cnt = 0;
1630
1631                 wm.yclk = rdev->pm.current_mclk * 10;
1632                 wm.sclk = rdev->pm.current_sclk * 10;
1633                 wm.disp_clk = mode->clock;
1634                 wm.src_width = mode->crtc_hdisplay;
1635                 wm.active_time = mode->crtc_hdisplay * pixel_period;
1636                 wm.blank_time = line_time - wm.active_time;
1637                 wm.interlaced = false;
1638                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
1639                         wm.interlaced = true;
1640                 wm.vsc = radeon_crtc->vsc;
1641                 wm.vtaps = 1;
1642                 if (radeon_crtc->rmx_type != RMX_OFF)
1643                         wm.vtaps = 2;
1644                 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
1645                 wm.lb_size = lb_size;
1646                 if (rdev->family == CHIP_ARUBA)
1647                         wm.dram_channels = evergreen_get_number_of_dram_channels(rdev);
1648                 else
1649                         wm.dram_channels = si_get_number_of_dram_channels(rdev);
1650                 wm.num_heads = num_heads;
1651
1652                 /* set for high clocks */
1653                 latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
1654                 /* set for low clocks */
1655                 /* wm.yclk = low clk; wm.sclk = low clk */
1656                 latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
1657
1658                 /* possibly force display priority to high */
1659                 /* should really do this at mode validation time... */
1660                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
1661                     !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
1662                     !dce6_check_latency_hiding(&wm) ||
1663                     (rdev->disp_priority == 2)) {
1664                         DRM_DEBUG_KMS("force priority to high\n");
1665                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
1666                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
1667                 }
1668
1669                 a.full = dfixed_const(1000);
1670                 b.full = dfixed_const(mode->clock);
1671                 b.full = dfixed_div(b, a);
1672                 c.full = dfixed_const(latency_watermark_a);
1673                 c.full = dfixed_mul(c, b);
1674                 c.full = dfixed_mul(c, radeon_crtc->hsc);
1675                 c.full = dfixed_div(c, a);
1676                 a.full = dfixed_const(16);
1677                 c.full = dfixed_div(c, a);
1678                 priority_a_mark = dfixed_trunc(c);
1679                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
1680
1681                 a.full = dfixed_const(1000);
1682                 b.full = dfixed_const(mode->clock);
1683                 b.full = dfixed_div(b, a);
1684                 c.full = dfixed_const(latency_watermark_b);
1685                 c.full = dfixed_mul(c, b);
1686                 c.full = dfixed_mul(c, radeon_crtc->hsc);
1687                 c.full = dfixed_div(c, a);
1688                 a.full = dfixed_const(16);
1689                 c.full = dfixed_div(c, a);
1690                 priority_b_mark = dfixed_trunc(c);
1691                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
1692         }
1693
1694         /* select wm A */
1695         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1696         tmp = arb_control3;
1697         tmp &= ~LATENCY_WATERMARK_MASK(3);
1698         tmp |= LATENCY_WATERMARK_MASK(1);
1699         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1700         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1701                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
1702                 LATENCY_HIGH_WATERMARK(line_time)));
1703         /* select wm B */
1704         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1705         tmp &= ~LATENCY_WATERMARK_MASK(3);
1706         tmp |= LATENCY_WATERMARK_MASK(2);
1707         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1708         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1709                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
1710                 LATENCY_HIGH_WATERMARK(line_time)));
1711         /* restore original selection */
1712         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
1713
1714         /* write the priority marks */
1715         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
1716         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
1717
1718 }
1719
1720 void dce6_bandwidth_update(struct radeon_device *rdev)
1721 {
1722         struct drm_display_mode *mode0 = NULL;
1723         struct drm_display_mode *mode1 = NULL;
1724         u32 num_heads = 0, lb_size;
1725         int i;
1726
1727         radeon_update_display_priority(rdev);
1728
1729         for (i = 0; i < rdev->num_crtc; i++) {
1730                 if (rdev->mode_info.crtcs[i]->base.enabled)
1731                         num_heads++;
1732         }
1733         for (i = 0; i < rdev->num_crtc; i += 2) {
1734                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
1735                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
1736                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
1737                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
1738                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
1739                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
1740         }
1741 }
1742
1743 /*
1744  * Core functions
1745  */
1746 static void si_tiling_mode_table_init(struct radeon_device *rdev)
1747 {
1748         const u32 num_tile_mode_states = 32;
1749         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1750
1751         switch (rdev->config.si.mem_row_size_in_kb) {
1752         case 1:
1753                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1754                 break;
1755         case 2:
1756         default:
1757                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1758                 break;
1759         case 4:
1760                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1761                 break;
1762         }
1763
1764         if ((rdev->family == CHIP_TAHITI) ||
1765             (rdev->family == CHIP_PITCAIRN)) {
1766                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1767                         switch (reg_offset) {
1768                         case 0:  /* non-AA compressed depth or any compressed stencil */
1769                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1770                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1771                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1772                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1773                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1774                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1775                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1776                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1777                                 break;
1778                         case 1:  /* 2xAA/4xAA compressed depth only */
1779                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1780                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1781                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1782                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1783                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1784                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1785                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1786                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1787                                 break;
1788                         case 2:  /* 8xAA compressed depth only */
1789                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1790                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1791                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1792                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1793                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1794                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1795                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1796                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1797                                 break;
1798                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1799                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1800                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1801                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1802                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1803                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1804                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1805                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1806                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1807                                 break;
1808                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1809                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1810                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1811                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1812                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1813                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1814                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1815                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1816                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1817                                 break;
1818                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1819                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1820                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1821                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1822                                                  TILE_SPLIT(split_equal_to_row_size) |
1823                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1824                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1825                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1826                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1827                                 break;
1828                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1829                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1830                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1831                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1832                                                  TILE_SPLIT(split_equal_to_row_size) |
1833                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1834                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1835                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1836                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1837                                 break;
1838                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1839                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1840                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1841                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1842                                                  TILE_SPLIT(split_equal_to_row_size) |
1843                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1844                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1845                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1846                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1847                                 break;
1848                         case 8:  /* 1D and 1D Array Surfaces */
1849                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1850                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1851                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1852                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1853                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1854                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1855                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1856                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1857                                 break;
1858                         case 9:  /* Displayable maps. */
1859                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1860                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1861                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1862                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1863                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1864                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1865                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1866                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1867                                 break;
1868                         case 10:  /* Display 8bpp. */
1869                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1870                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1871                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1872                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1873                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1874                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1875                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1876                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1877                                 break;
1878                         case 11:  /* Display 16bpp. */
1879                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1880                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1881                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1882                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1883                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1884                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1885                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1886                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1887                                 break;
1888                         case 12:  /* Display 32bpp. */
1889                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1890                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1891                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1892                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1893                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1894                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1895                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1896                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1897                                 break;
1898                         case 13:  /* Thin. */
1899                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1900                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1901                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1902                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1903                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1904                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1905                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1906                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1907                                 break;
1908                         case 14:  /* Thin 8 bpp. */
1909                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1910                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1911                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1912                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1913                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1914                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1915                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1916                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1917                                 break;
1918                         case 15:  /* Thin 16 bpp. */
1919                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1920                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1921                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1922                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1923                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1924                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1925                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1926                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1927                                 break;
1928                         case 16:  /* Thin 32 bpp. */
1929                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1930                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1931                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1932                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1933                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1934                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1935                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1936                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1937                                 break;
1938                         case 17:  /* Thin 64 bpp. */
1939                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1940                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1941                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1942                                                  TILE_SPLIT(split_equal_to_row_size) |
1943                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1944                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1945                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1946                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1947                                 break;
1948                         case 21:  /* 8 bpp PRT. */
1949                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1950                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1951                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1952                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1953                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1954                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1955                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1956                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1957                                 break;
1958                         case 22:  /* 16 bpp PRT */
1959                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1960                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1961                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1962                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1963                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1964                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1965                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1966                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1967                                 break;
1968                         case 23:  /* 32 bpp PRT */
1969                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1970                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1971                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1972                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1973                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1974                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1975                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1976                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1977                                 break;
1978                         case 24:  /* 64 bpp PRT */
1979                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1980                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1981                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1982                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1983                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1984                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1985                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1986                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1987                                 break;
1988                         case 25:  /* 128 bpp PRT */
1989                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1990                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1991                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1992                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1993                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
1994                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1995                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1996                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1997                                 break;
1998                         default:
1999                                 gb_tile_moden = 0;
2000                                 break;
2001                         }
2002                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2003                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2004                 }
2005         } else if ((rdev->family == CHIP_VERDE) ||
2006                    (rdev->family == CHIP_OLAND)) {
2007                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2008                         switch (reg_offset) {
2009                         case 0:  /* non-AA compressed depth or any compressed stencil */
2010                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2011                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2012                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2013                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2014                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2015                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2016                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2017                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2018                                 break;
2019                         case 1:  /* 2xAA/4xAA compressed depth only */
2020                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2021                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2022                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2023                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2024                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2025                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2026                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2027                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2028                                 break;
2029                         case 2:  /* 8xAA compressed depth only */
2030                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2031                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2032                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2033                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2034                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2035                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2036                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2037                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2038                                 break;
2039                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2040                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2041                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2042                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2043                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2044                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2045                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2046                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2047                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2048                                 break;
2049                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2050                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2051                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2052                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2053                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2054                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2055                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2056                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2057                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2058                                 break;
2059                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2060                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2061                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2062                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2063                                                  TILE_SPLIT(split_equal_to_row_size) |
2064                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2065                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2066                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2067                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2068                                 break;
2069                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2070                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2071                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2072                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2073                                                  TILE_SPLIT(split_equal_to_row_size) |
2074                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2075                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2076                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2077                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2078                                 break;
2079                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2080                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2081                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2082                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2083                                                  TILE_SPLIT(split_equal_to_row_size) |
2084                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2085                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2086                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2087                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2088                                 break;
2089                         case 8:  /* 1D and 1D Array Surfaces */
2090                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2091                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2092                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2093                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2094                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2095                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2096                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2097                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2098                                 break;
2099                         case 9:  /* Displayable maps. */
2100                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2101                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2102                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2103                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2104                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2105                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2106                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2107                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2108                                 break;
2109                         case 10:  /* Display 8bpp. */
2110                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2111                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2112                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2113                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2114                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2115                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2116                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2117                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2118                                 break;
2119                         case 11:  /* Display 16bpp. */
2120                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2121                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2122                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2123                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2124                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2125                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2126                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2127                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2128                                 break;
2129                         case 12:  /* Display 32bpp. */
2130                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2131                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2132                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2133                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2134                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2135                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2136                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2137                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2138                                 break;
2139                         case 13:  /* Thin. */
2140                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2141                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2142                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2143                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2144                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2145                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2146                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2147                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2148                                 break;
2149                         case 14:  /* Thin 8 bpp. */
2150                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2151                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2152                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2153                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2154                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2155                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2156                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2157                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2158                                 break;
2159                         case 15:  /* Thin 16 bpp. */
2160                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2161                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2162                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2163                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2164                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2165                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2166                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2167                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2168                                 break;
2169                         case 16:  /* Thin 32 bpp. */
2170                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2171                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2172                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2173                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2174                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2175                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2176                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2177                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2178                                 break;
2179                         case 17:  /* Thin 64 bpp. */
2180                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2181                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2182                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2183                                                  TILE_SPLIT(split_equal_to_row_size) |
2184                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2185                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2188                                 break;
2189                         case 21:  /* 8 bpp PRT. */
2190                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2191                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2192                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2193                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2194                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2195                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2196                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2197                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2198                                 break;
2199                         case 22:  /* 16 bpp PRT */
2200                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2201                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2202                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2203                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2204                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2205                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2206                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2207                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2208                                 break;
2209                         case 23:  /* 32 bpp PRT */
2210                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2211                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2212                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2213                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2214                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2215                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2216                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2217                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2218                                 break;
2219                         case 24:  /* 64 bpp PRT */
2220                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2221                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2222                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2223                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2224                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2225                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2226                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2227                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2228                                 break;
2229                         case 25:  /* 128 bpp PRT */
2230                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2232                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2233                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2234                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2235                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2236                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2237                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2238                                 break;
2239                         default:
2240                                 gb_tile_moden = 0;
2241                                 break;
2242                         }
2243                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2244                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2245                 }
2246         } else
2247                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2248 }
2249
2250 static void si_select_se_sh(struct radeon_device *rdev,
2251                             u32 se_num, u32 sh_num)
2252 {
2253         u32 data = INSTANCE_BROADCAST_WRITES;
2254
2255         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2256                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2257         else if (se_num == 0xffffffff)
2258                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2259         else if (sh_num == 0xffffffff)
2260                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2261         else
2262                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2263         WREG32(GRBM_GFX_INDEX, data);
2264 }
2265
2266 static u32 si_create_bitmask(u32 bit_width)
2267 {
2268         u32 i, mask = 0;
2269
2270         for (i = 0; i < bit_width; i++) {
2271                 mask <<= 1;
2272                 mask |= 1;
2273         }
2274         return mask;
2275 }
2276
2277 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2278 {
2279         u32 data, mask;
2280
2281         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2282         if (data & 1)
2283                 data &= INACTIVE_CUS_MASK;
2284         else
2285                 data = 0;
2286         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2287
2288         data >>= INACTIVE_CUS_SHIFT;
2289
2290         mask = si_create_bitmask(cu_per_sh);
2291
2292         return ~data & mask;
2293 }
2294
2295 static void si_setup_spi(struct radeon_device *rdev,
2296                          u32 se_num, u32 sh_per_se,
2297                          u32 cu_per_sh)
2298 {
2299         int i, j, k;
2300         u32 data, mask, active_cu;
2301
2302         for (i = 0; i < se_num; i++) {
2303                 for (j = 0; j < sh_per_se; j++) {
2304                         si_select_se_sh(rdev, i, j);
2305                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2306                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2307
2308                         mask = 1;
2309                         for (k = 0; k < 16; k++) {
2310                                 mask <<= k;
2311                                 if (active_cu & mask) {
2312                                         data &= ~mask;
2313                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2314                                         break;
2315                                 }
2316                         }
2317                 }
2318         }
2319         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2320 }
2321
2322 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2323                               u32 max_rb_num, u32 se_num,
2324                               u32 sh_per_se)
2325 {
2326         u32 data, mask;
2327
2328         data = RREG32(CC_RB_BACKEND_DISABLE);
2329         if (data & 1)
2330                 data &= BACKEND_DISABLE_MASK;
2331         else
2332                 data = 0;
2333         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2334
2335         data >>= BACKEND_DISABLE_SHIFT;
2336
2337         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2338
2339         return data & mask;
2340 }
2341
2342 static void si_setup_rb(struct radeon_device *rdev,
2343                         u32 se_num, u32 sh_per_se,
2344                         u32 max_rb_num)
2345 {
2346         int i, j;
2347         u32 data, mask;
2348         u32 disabled_rbs = 0;
2349         u32 enabled_rbs = 0;
2350
2351         for (i = 0; i < se_num; i++) {
2352                 for (j = 0; j < sh_per_se; j++) {
2353                         si_select_se_sh(rdev, i, j);
2354                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2355                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2356                 }
2357         }
2358         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2359
2360         mask = 1;
2361         for (i = 0; i < max_rb_num; i++) {
2362                 if (!(disabled_rbs & mask))
2363                         enabled_rbs |= mask;
2364                 mask <<= 1;
2365         }
2366
2367         for (i = 0; i < se_num; i++) {
2368                 si_select_se_sh(rdev, i, 0xffffffff);
2369                 data = 0;
2370                 for (j = 0; j < sh_per_se; j++) {
2371                         switch (enabled_rbs & 3) {
2372                         case 1:
2373                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2374                                 break;
2375                         case 2:
2376                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2377                                 break;
2378                         case 3:
2379                         default:
2380                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2381                                 break;
2382                         }
2383                         enabled_rbs >>= 2;
2384                 }
2385                 WREG32(PA_SC_RASTER_CONFIG, data);
2386         }
2387         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2388 }
2389
2390 static void si_gpu_init(struct radeon_device *rdev)
2391 {
2392         u32 gb_addr_config = 0;
2393         u32 mc_shared_chmap, mc_arb_ramcfg;
2394         u32 sx_debug_1;
2395         u32 hdp_host_path_cntl;
2396         u32 tmp;
2397         int i, j;
2398
2399         switch (rdev->family) {
2400         case CHIP_TAHITI:
2401                 rdev->config.si.max_shader_engines = 2;
2402                 rdev->config.si.max_tile_pipes = 12;
2403                 rdev->config.si.max_cu_per_sh = 8;
2404                 rdev->config.si.max_sh_per_se = 2;
2405                 rdev->config.si.max_backends_per_se = 4;
2406                 rdev->config.si.max_texture_channel_caches = 12;
2407                 rdev->config.si.max_gprs = 256;
2408                 rdev->config.si.max_gs_threads = 32;
2409                 rdev->config.si.max_hw_contexts = 8;
2410
2411                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2412                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2413                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2414                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2415                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2416                 break;
2417         case CHIP_PITCAIRN:
2418                 rdev->config.si.max_shader_engines = 2;
2419                 rdev->config.si.max_tile_pipes = 8;
2420                 rdev->config.si.max_cu_per_sh = 5;
2421                 rdev->config.si.max_sh_per_se = 2;
2422                 rdev->config.si.max_backends_per_se = 4;
2423                 rdev->config.si.max_texture_channel_caches = 8;
2424                 rdev->config.si.max_gprs = 256;
2425                 rdev->config.si.max_gs_threads = 32;
2426                 rdev->config.si.max_hw_contexts = 8;
2427
2428                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2429                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2430                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2431                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2432                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2433                 break;
2434         case CHIP_VERDE:
2435         default:
2436                 rdev->config.si.max_shader_engines = 1;
2437                 rdev->config.si.max_tile_pipes = 4;
2438                 rdev->config.si.max_cu_per_sh = 2;
2439                 rdev->config.si.max_sh_per_se = 2;
2440                 rdev->config.si.max_backends_per_se = 4;
2441                 rdev->config.si.max_texture_channel_caches = 4;
2442                 rdev->config.si.max_gprs = 256;
2443                 rdev->config.si.max_gs_threads = 32;
2444                 rdev->config.si.max_hw_contexts = 8;
2445
2446                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2447                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2448                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2449                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2450                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2451                 break;
2452         case CHIP_OLAND:
2453                 rdev->config.si.max_shader_engines = 1;
2454                 rdev->config.si.max_tile_pipes = 4;
2455                 rdev->config.si.max_cu_per_sh = 6;
2456                 rdev->config.si.max_sh_per_se = 1;
2457                 rdev->config.si.max_backends_per_se = 2;
2458                 rdev->config.si.max_texture_channel_caches = 4;
2459                 rdev->config.si.max_gprs = 256;
2460                 rdev->config.si.max_gs_threads = 16;
2461                 rdev->config.si.max_hw_contexts = 8;
2462
2463                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2464                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2465                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2466                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2467                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2468                 break;
2469         }
2470
2471         /* Initialize HDP */
2472         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2473                 WREG32((0x2c14 + j), 0x00000000);
2474                 WREG32((0x2c18 + j), 0x00000000);
2475                 WREG32((0x2c1c + j), 0x00000000);
2476                 WREG32((0x2c20 + j), 0x00000000);
2477                 WREG32((0x2c24 + j), 0x00000000);
2478         }
2479
2480         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2481
2482         evergreen_fix_pci_max_read_req_size(rdev);
2483
2484         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2485
2486         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2487         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2488
2489         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2490         rdev->config.si.mem_max_burst_length_bytes = 256;
2491         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2492         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2493         if (rdev->config.si.mem_row_size_in_kb > 4)
2494                 rdev->config.si.mem_row_size_in_kb = 4;
2495         /* XXX use MC settings? */
2496         rdev->config.si.shader_engine_tile_size = 32;
2497         rdev->config.si.num_gpus = 1;
2498         rdev->config.si.multi_gpu_tile_size = 64;
2499
2500         /* fix up row size */
2501         gb_addr_config &= ~ROW_SIZE_MASK;
2502         switch (rdev->config.si.mem_row_size_in_kb) {
2503         case 1:
2504         default:
2505                 gb_addr_config |= ROW_SIZE(0);
2506                 break;
2507         case 2:
2508                 gb_addr_config |= ROW_SIZE(1);
2509                 break;
2510         case 4:
2511                 gb_addr_config |= ROW_SIZE(2);
2512                 break;
2513         }
2514
2515         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2516          * not have bank info, so create a custom tiling dword.
2517          * bits 3:0   num_pipes
2518          * bits 7:4   num_banks
2519          * bits 11:8  group_size
2520          * bits 15:12 row_size
2521          */
2522         rdev->config.si.tile_config = 0;
2523         switch (rdev->config.si.num_tile_pipes) {
2524         case 1:
2525                 rdev->config.si.tile_config |= (0 << 0);
2526                 break;
2527         case 2:
2528                 rdev->config.si.tile_config |= (1 << 0);
2529                 break;
2530         case 4:
2531                 rdev->config.si.tile_config |= (2 << 0);
2532                 break;
2533         case 8:
2534         default:
2535                 /* XXX what about 12? */
2536                 rdev->config.si.tile_config |= (3 << 0);
2537                 break;
2538         }       
2539         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
2540         case 0: /* four banks */
2541                 rdev->config.si.tile_config |= 0 << 4;
2542                 break;
2543         case 1: /* eight banks */
2544                 rdev->config.si.tile_config |= 1 << 4;
2545                 break;
2546         case 2: /* sixteen banks */
2547         default:
2548                 rdev->config.si.tile_config |= 2 << 4;
2549                 break;
2550         }
2551         rdev->config.si.tile_config |=
2552                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2553         rdev->config.si.tile_config |=
2554                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2555
2556         WREG32(GB_ADDR_CONFIG, gb_addr_config);
2557         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
2558         WREG32(DMIF_ADDR_CALC, gb_addr_config);
2559         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2560         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
2561         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
2562         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2563         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2564         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2565
2566         si_tiling_mode_table_init(rdev);
2567
2568         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
2569                     rdev->config.si.max_sh_per_se,
2570                     rdev->config.si.max_backends_per_se);
2571
2572         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
2573                      rdev->config.si.max_sh_per_se,
2574                      rdev->config.si.max_cu_per_sh);
2575
2576
2577         /* set HW defaults for 3D engine */
2578         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
2579                                      ROQ_IB2_START(0x2b)));
2580         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2581
2582         sx_debug_1 = RREG32(SX_DEBUG_1);
2583         WREG32(SX_DEBUG_1, sx_debug_1);
2584
2585         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2586
2587         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
2588                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
2589                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
2590                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
2591
2592         WREG32(VGT_NUM_INSTANCES, 1);
2593
2594         WREG32(CP_PERFMON_CNTL, 0);
2595
2596         WREG32(SQ_CONFIG, 0);
2597
2598         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2599                                           FORCE_EOV_MAX_REZ_CNT(255)));
2600
2601         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2602                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2603
2604         WREG32(VGT_GS_VERTEX_REUSE, 16);
2605         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2606
2607         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
2608         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
2609         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
2610         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
2611         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
2612         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
2613         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
2614         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
2615
2616         tmp = RREG32(HDP_MISC_CNTL);
2617         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2618         WREG32(HDP_MISC_CNTL, tmp);
2619
2620         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2621         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2622
2623         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2624
2625         udelay(50);
2626 }
2627
2628 /*
2629  * GPU scratch registers helpers function.
2630  */
2631 static void si_scratch_init(struct radeon_device *rdev)
2632 {
2633         int i;
2634
2635         rdev->scratch.num_reg = 7;
2636         rdev->scratch.reg_base = SCRATCH_REG0;
2637         for (i = 0; i < rdev->scratch.num_reg; i++) {
2638                 rdev->scratch.free[i] = true;
2639                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2640         }
2641 }
2642
2643 void si_fence_ring_emit(struct radeon_device *rdev,
2644                         struct radeon_fence *fence)
2645 {
2646         struct radeon_ring *ring = &rdev->ring[fence->ring];
2647         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2648
2649         /* flush read cache over gart */
2650         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2651         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2652         radeon_ring_write(ring, 0);
2653         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2654         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2655                           PACKET3_TC_ACTION_ENA |
2656                           PACKET3_SH_KCACHE_ACTION_ENA |
2657                           PACKET3_SH_ICACHE_ACTION_ENA);
2658         radeon_ring_write(ring, 0xFFFFFFFF);
2659         radeon_ring_write(ring, 0);
2660         radeon_ring_write(ring, 10); /* poll interval */
2661         /* EVENT_WRITE_EOP - flush caches, send int */
2662         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2663         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
2664         radeon_ring_write(ring, addr & 0xffffffff);
2665         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
2666         radeon_ring_write(ring, fence->seq);
2667         radeon_ring_write(ring, 0);
2668 }
2669
2670 /*
2671  * IB stuff
2672  */
2673 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2674 {
2675         struct radeon_ring *ring = &rdev->ring[ib->ring];
2676         u32 header;
2677
2678         if (ib->is_const_ib) {
2679                 /* set switch buffer packet before const IB */
2680                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2681                 radeon_ring_write(ring, 0);
2682
2683                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2684         } else {
2685                 u32 next_rptr;
2686                 if (ring->rptr_save_reg) {
2687                         next_rptr = ring->wptr + 3 + 4 + 8;
2688                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2689                         radeon_ring_write(ring, ((ring->rptr_save_reg -
2690                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
2691                         radeon_ring_write(ring, next_rptr);
2692                 } else if (rdev->wb.enabled) {
2693                         next_rptr = ring->wptr + 5 + 4 + 8;
2694                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2695                         radeon_ring_write(ring, (1 << 8));
2696                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2697                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2698                         radeon_ring_write(ring, next_rptr);
2699                 }
2700
2701                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2702         }
2703
2704         radeon_ring_write(ring, header);
2705         radeon_ring_write(ring,
2706 #ifdef __BIG_ENDIAN
2707                           (2 << 0) |
2708 #endif
2709                           (ib->gpu_addr & 0xFFFFFFFC));
2710         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2711         radeon_ring_write(ring, ib->length_dw |
2712                           (ib->vm ? (ib->vm->id << 24) : 0));
2713
2714         if (!ib->is_const_ib) {
2715                 /* flush read cache over gart for this vmid */
2716                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2717                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2718                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
2719                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2720                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2721                                   PACKET3_TC_ACTION_ENA |
2722                                   PACKET3_SH_KCACHE_ACTION_ENA |
2723                                   PACKET3_SH_ICACHE_ACTION_ENA);
2724                 radeon_ring_write(ring, 0xFFFFFFFF);
2725                 radeon_ring_write(ring, 0);
2726                 radeon_ring_write(ring, 10); /* poll interval */
2727         }
2728 }
2729
2730 /*
2731  * CP.
2732  */
2733 static void si_cp_enable(struct radeon_device *rdev, bool enable)
2734 {
2735         if (enable)
2736                 WREG32(CP_ME_CNTL, 0);
2737         else {
2738                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2739                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2740                 WREG32(SCRATCH_UMSK, 0);
2741                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2742                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2743                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2744         }
2745         udelay(50);
2746 }
2747
2748 static int si_cp_load_microcode(struct radeon_device *rdev)
2749 {
2750         const __be32 *fw_data;
2751         int i;
2752
2753         if (!rdev->me_fw || !rdev->pfp_fw)
2754                 return -EINVAL;
2755
2756         si_cp_enable(rdev, false);
2757
2758         /* PFP */
2759         fw_data = (const __be32 *)rdev->pfp_fw->data;
2760         WREG32(CP_PFP_UCODE_ADDR, 0);
2761         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
2762                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2763         WREG32(CP_PFP_UCODE_ADDR, 0);
2764
2765         /* CE */
2766         fw_data = (const __be32 *)rdev->ce_fw->data;
2767         WREG32(CP_CE_UCODE_ADDR, 0);
2768         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
2769                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2770         WREG32(CP_CE_UCODE_ADDR, 0);
2771
2772         /* ME */
2773         fw_data = (const __be32 *)rdev->me_fw->data;
2774         WREG32(CP_ME_RAM_WADDR, 0);
2775         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
2776                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2777         WREG32(CP_ME_RAM_WADDR, 0);
2778
2779         WREG32(CP_PFP_UCODE_ADDR, 0);
2780         WREG32(CP_CE_UCODE_ADDR, 0);
2781         WREG32(CP_ME_RAM_WADDR, 0);
2782         WREG32(CP_ME_RAM_RADDR, 0);
2783         return 0;
2784 }
2785
2786 static int si_cp_start(struct radeon_device *rdev)
2787 {
2788         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2789         int r, i;
2790
2791         r = radeon_ring_lock(rdev, ring, 7 + 4);
2792         if (r) {
2793                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2794                 return r;
2795         }
2796         /* init the CP */
2797         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
2798         radeon_ring_write(ring, 0x1);
2799         radeon_ring_write(ring, 0x0);
2800         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
2801         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
2802         radeon_ring_write(ring, 0);
2803         radeon_ring_write(ring, 0);
2804
2805         /* init the CE partitions */
2806         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2807         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2808         radeon_ring_write(ring, 0xc000);
2809         radeon_ring_write(ring, 0xe000);
2810         radeon_ring_unlock_commit(rdev, ring);
2811
2812         si_cp_enable(rdev, true);
2813
2814         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
2815         if (r) {
2816                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2817                 return r;
2818         }
2819
2820         /* setup clear context state */
2821         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2822         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2823
2824         for (i = 0; i < si_default_size; i++)
2825                 radeon_ring_write(ring, si_default_state[i]);
2826
2827         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2828         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2829
2830         /* set clear context state */
2831         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2832         radeon_ring_write(ring, 0);
2833
2834         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2835         radeon_ring_write(ring, 0x00000316);
2836         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2837         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2838
2839         radeon_ring_unlock_commit(rdev, ring);
2840
2841         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
2842                 ring = &rdev->ring[i];
2843                 r = radeon_ring_lock(rdev, ring, 2);
2844
2845                 /* clear the compute context state */
2846                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
2847                 radeon_ring_write(ring, 0);
2848
2849                 radeon_ring_unlock_commit(rdev, ring);
2850         }
2851
2852         return 0;
2853 }
2854
2855 static void si_cp_fini(struct radeon_device *rdev)
2856 {
2857         struct radeon_ring *ring;
2858         si_cp_enable(rdev, false);
2859
2860         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2861         radeon_ring_fini(rdev, ring);
2862         radeon_scratch_free(rdev, ring->rptr_save_reg);
2863
2864         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
2865         radeon_ring_fini(rdev, ring);
2866         radeon_scratch_free(rdev, ring->rptr_save_reg);
2867
2868         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
2869         radeon_ring_fini(rdev, ring);
2870         radeon_scratch_free(rdev, ring->rptr_save_reg);
2871 }
2872
2873 static int si_cp_resume(struct radeon_device *rdev)
2874 {
2875         struct radeon_ring *ring;
2876         u32 tmp;
2877         u32 rb_bufsz;
2878         int r;
2879
2880         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
2881         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
2882                                  SOFT_RESET_PA |
2883                                  SOFT_RESET_VGT |
2884                                  SOFT_RESET_SPI |
2885                                  SOFT_RESET_SX));
2886         RREG32(GRBM_SOFT_RESET);
2887         mdelay(15);
2888         WREG32(GRBM_SOFT_RESET, 0);
2889         RREG32(GRBM_SOFT_RESET);
2890
2891         WREG32(CP_SEM_WAIT_TIMER, 0x0);
2892         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2893
2894         /* Set the write pointer delay */
2895         WREG32(CP_RB_WPTR_DELAY, 0);
2896
2897         WREG32(CP_DEBUG, 0);
2898         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2899
2900         /* ring 0 - compute and gfx */
2901         /* Set ring buffer size */
2902         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2903         rb_bufsz = drm_order(ring->ring_size / 8);
2904         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2905 #ifdef __BIG_ENDIAN
2906         tmp |= BUF_SWAP_32BIT;
2907 #endif
2908         WREG32(CP_RB0_CNTL, tmp);
2909
2910         /* Initialize the ring buffer's read and write pointers */
2911         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2912         ring->wptr = 0;
2913         WREG32(CP_RB0_WPTR, ring->wptr);
2914
2915         /* set the wb address whether it's enabled or not */
2916         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2917         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2918
2919         if (rdev->wb.enabled)
2920                 WREG32(SCRATCH_UMSK, 0xff);
2921         else {
2922                 tmp |= RB_NO_UPDATE;
2923                 WREG32(SCRATCH_UMSK, 0);
2924         }
2925
2926         mdelay(1);
2927         WREG32(CP_RB0_CNTL, tmp);
2928
2929         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
2930
2931         ring->rptr = RREG32(CP_RB0_RPTR);
2932
2933         /* ring1  - compute only */
2934         /* Set ring buffer size */
2935         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
2936         rb_bufsz = drm_order(ring->ring_size / 8);
2937         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2938 #ifdef __BIG_ENDIAN
2939         tmp |= BUF_SWAP_32BIT;
2940 #endif
2941         WREG32(CP_RB1_CNTL, tmp);
2942
2943         /* Initialize the ring buffer's read and write pointers */
2944         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
2945         ring->wptr = 0;
2946         WREG32(CP_RB1_WPTR, ring->wptr);
2947
2948         /* set the wb address whether it's enabled or not */
2949         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
2950         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
2951
2952         mdelay(1);
2953         WREG32(CP_RB1_CNTL, tmp);
2954
2955         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
2956
2957         ring->rptr = RREG32(CP_RB1_RPTR);
2958
2959         /* ring2 - compute only */
2960         /* Set ring buffer size */
2961         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
2962         rb_bufsz = drm_order(ring->ring_size / 8);
2963         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2964 #ifdef __BIG_ENDIAN
2965         tmp |= BUF_SWAP_32BIT;
2966 #endif
2967         WREG32(CP_RB2_CNTL, tmp);
2968
2969         /* Initialize the ring buffer's read and write pointers */
2970         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
2971         ring->wptr = 0;
2972         WREG32(CP_RB2_WPTR, ring->wptr);
2973
2974         /* set the wb address whether it's enabled or not */
2975         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
2976         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
2977
2978         mdelay(1);
2979         WREG32(CP_RB2_CNTL, tmp);
2980
2981         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
2982
2983         ring->rptr = RREG32(CP_RB2_RPTR);
2984
2985         /* start the rings */
2986         si_cp_start(rdev);
2987         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2988         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
2989         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
2990         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2991         if (r) {
2992                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2993                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2994                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2995                 return r;
2996         }
2997         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
2998         if (r) {
2999                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3000         }
3001         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3002         if (r) {
3003                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3004         }
3005
3006         return 0;
3007 }
3008
3009 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3010 {
3011         u32 reset_mask = 0;
3012         u32 tmp;
3013
3014         /* GRBM_STATUS */
3015         tmp = RREG32(GRBM_STATUS);
3016         if (tmp & (PA_BUSY | SC_BUSY |
3017                    BCI_BUSY | SX_BUSY |
3018                    TA_BUSY | VGT_BUSY |
3019                    DB_BUSY | CB_BUSY |
3020                    GDS_BUSY | SPI_BUSY |
3021                    IA_BUSY | IA_BUSY_NO_DMA))
3022                 reset_mask |= RADEON_RESET_GFX;
3023
3024         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3025                    CP_BUSY | CP_COHERENCY_BUSY))
3026                 reset_mask |= RADEON_RESET_CP;
3027
3028         if (tmp & GRBM_EE_BUSY)
3029                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3030
3031         /* GRBM_STATUS2 */
3032         tmp = RREG32(GRBM_STATUS2);
3033         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3034                 reset_mask |= RADEON_RESET_RLC;
3035
3036         /* DMA_STATUS_REG 0 */
3037         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3038         if (!(tmp & DMA_IDLE))
3039                 reset_mask |= RADEON_RESET_DMA;
3040
3041         /* DMA_STATUS_REG 1 */
3042         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3043         if (!(tmp & DMA_IDLE))
3044                 reset_mask |= RADEON_RESET_DMA1;
3045
3046         /* SRBM_STATUS2 */
3047         tmp = RREG32(SRBM_STATUS2);
3048         if (tmp & DMA_BUSY)
3049                 reset_mask |= RADEON_RESET_DMA;
3050
3051         if (tmp & DMA1_BUSY)
3052                 reset_mask |= RADEON_RESET_DMA1;
3053
3054         /* SRBM_STATUS */
3055         tmp = RREG32(SRBM_STATUS);
3056
3057         if (tmp & IH_BUSY)
3058                 reset_mask |= RADEON_RESET_IH;
3059
3060         if (tmp & SEM_BUSY)
3061                 reset_mask |= RADEON_RESET_SEM;
3062
3063         if (tmp & GRBM_RQ_PENDING)
3064                 reset_mask |= RADEON_RESET_GRBM;
3065
3066         if (tmp & VMC_BUSY)
3067                 reset_mask |= RADEON_RESET_VMC;
3068
3069         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3070                    MCC_BUSY | MCD_BUSY))
3071                 reset_mask |= RADEON_RESET_MC;
3072
3073         if (evergreen_is_display_hung(rdev))
3074                 reset_mask |= RADEON_RESET_DISPLAY;
3075
3076         /* VM_L2_STATUS */
3077         tmp = RREG32(VM_L2_STATUS);
3078         if (tmp & L2_BUSY)
3079                 reset_mask |= RADEON_RESET_VMC;
3080
3081         /* Skip MC reset as it's mostly likely not hung, just busy */
3082         if (reset_mask & RADEON_RESET_MC) {
3083                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3084                 reset_mask &= ~RADEON_RESET_MC;
3085         }
3086
3087         return reset_mask;
3088 }
3089
3090 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3091 {
3092         struct evergreen_mc_save save;
3093         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3094         u32 tmp;
3095
3096         if (reset_mask == 0)
3097                 return;
3098
3099         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3100
3101         evergreen_print_gpu_status_regs(rdev);
3102         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3103                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3104         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3105                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3106
3107         /* Disable CP parsing/prefetching */
3108         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3109
3110         if (reset_mask & RADEON_RESET_DMA) {
3111                 /* dma0 */
3112                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3113                 tmp &= ~DMA_RB_ENABLE;
3114                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3115         }
3116         if (reset_mask & RADEON_RESET_DMA1) {
3117                 /* dma1 */
3118                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3119                 tmp &= ~DMA_RB_ENABLE;
3120                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3121         }
3122
3123         udelay(50);
3124
3125         evergreen_mc_stop(rdev, &save);
3126         if (evergreen_mc_wait_for_idle(rdev)) {
3127                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3128         }
3129
3130         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3131                 grbm_soft_reset = SOFT_RESET_CB |
3132                         SOFT_RESET_DB |
3133                         SOFT_RESET_GDS |
3134                         SOFT_RESET_PA |
3135                         SOFT_RESET_SC |
3136                         SOFT_RESET_BCI |
3137                         SOFT_RESET_SPI |
3138                         SOFT_RESET_SX |
3139                         SOFT_RESET_TC |
3140                         SOFT_RESET_TA |
3141                         SOFT_RESET_VGT |
3142                         SOFT_RESET_IA;
3143         }
3144
3145         if (reset_mask & RADEON_RESET_CP) {
3146                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3147
3148                 srbm_soft_reset |= SOFT_RESET_GRBM;
3149         }
3150
3151         if (reset_mask & RADEON_RESET_DMA)
3152                 srbm_soft_reset |= SOFT_RESET_DMA;
3153
3154         if (reset_mask & RADEON_RESET_DMA1)
3155                 srbm_soft_reset |= SOFT_RESET_DMA1;
3156
3157         if (reset_mask & RADEON_RESET_DISPLAY)
3158                 srbm_soft_reset |= SOFT_RESET_DC;
3159
3160         if (reset_mask & RADEON_RESET_RLC)
3161                 grbm_soft_reset |= SOFT_RESET_RLC;
3162
3163         if (reset_mask & RADEON_RESET_SEM)
3164                 srbm_soft_reset |= SOFT_RESET_SEM;
3165
3166         if (reset_mask & RADEON_RESET_IH)
3167                 srbm_soft_reset |= SOFT_RESET_IH;
3168
3169         if (reset_mask & RADEON_RESET_GRBM)
3170                 srbm_soft_reset |= SOFT_RESET_GRBM;
3171
3172         if (reset_mask & RADEON_RESET_VMC)
3173                 srbm_soft_reset |= SOFT_RESET_VMC;
3174
3175         if (reset_mask & RADEON_RESET_MC)
3176                 srbm_soft_reset |= SOFT_RESET_MC;
3177
3178         if (grbm_soft_reset) {
3179                 tmp = RREG32(GRBM_SOFT_RESET);
3180                 tmp |= grbm_soft_reset;
3181                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3182                 WREG32(GRBM_SOFT_RESET, tmp);
3183                 tmp = RREG32(GRBM_SOFT_RESET);
3184
3185                 udelay(50);
3186
3187                 tmp &= ~grbm_soft_reset;
3188                 WREG32(GRBM_SOFT_RESET, tmp);
3189                 tmp = RREG32(GRBM_SOFT_RESET);
3190         }
3191
3192         if (srbm_soft_reset) {
3193                 tmp = RREG32(SRBM_SOFT_RESET);
3194                 tmp |= srbm_soft_reset;
3195                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3196                 WREG32(SRBM_SOFT_RESET, tmp);
3197                 tmp = RREG32(SRBM_SOFT_RESET);
3198
3199                 udelay(50);
3200
3201                 tmp &= ~srbm_soft_reset;
3202                 WREG32(SRBM_SOFT_RESET, tmp);
3203                 tmp = RREG32(SRBM_SOFT_RESET);
3204         }
3205
3206         /* Wait a little for things to settle down */
3207         udelay(50);
3208
3209         evergreen_mc_resume(rdev, &save);
3210         udelay(50);
3211
3212         evergreen_print_gpu_status_regs(rdev);
3213 }
3214
3215 int si_asic_reset(struct radeon_device *rdev)
3216 {
3217         u32 reset_mask;
3218
3219         reset_mask = si_gpu_check_soft_reset(rdev);
3220
3221         if (reset_mask)
3222                 r600_set_bios_scratch_engine_hung(rdev, true);
3223
3224         si_gpu_soft_reset(rdev, reset_mask);
3225
3226         reset_mask = si_gpu_check_soft_reset(rdev);
3227
3228         if (!reset_mask)
3229                 r600_set_bios_scratch_engine_hung(rdev, false);
3230
3231         return 0;
3232 }
3233
3234 /**
3235  * si_gfx_is_lockup - Check if the GFX engine is locked up
3236  *
3237  * @rdev: radeon_device pointer
3238  * @ring: radeon_ring structure holding ring information
3239  *
3240  * Check if the GFX engine is locked up.
3241  * Returns true if the engine appears to be locked up, false if not.
3242  */
3243 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3244 {
3245         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3246
3247         if (!(reset_mask & (RADEON_RESET_GFX |
3248                             RADEON_RESET_COMPUTE |
3249                             RADEON_RESET_CP))) {
3250                 radeon_ring_lockup_update(ring);
3251                 return false;
3252         }
3253         /* force CP activities */
3254         radeon_ring_force_activity(rdev, ring);
3255         return radeon_ring_test_lockup(rdev, ring);
3256 }
3257
3258 /**
3259  * si_dma_is_lockup - Check if the DMA engine is locked up
3260  *
3261  * @rdev: radeon_device pointer
3262  * @ring: radeon_ring structure holding ring information
3263  *
3264  * Check if the async DMA engine is locked up.
3265  * Returns true if the engine appears to be locked up, false if not.
3266  */
3267 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3268 {
3269         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3270         u32 mask;
3271
3272         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3273                 mask = RADEON_RESET_DMA;
3274         else
3275                 mask = RADEON_RESET_DMA1;
3276
3277         if (!(reset_mask & mask)) {
3278                 radeon_ring_lockup_update(ring);
3279                 return false;
3280         }
3281         /* force ring activities */
3282         radeon_ring_force_activity(rdev, ring);
3283         return radeon_ring_test_lockup(rdev, ring);
3284 }
3285
3286 /* MC */
3287 static void si_mc_program(struct radeon_device *rdev)
3288 {
3289         struct evergreen_mc_save save;
3290         u32 tmp;
3291         int i, j;
3292
3293         /* Initialize HDP */
3294         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3295                 WREG32((0x2c14 + j), 0x00000000);
3296                 WREG32((0x2c18 + j), 0x00000000);
3297                 WREG32((0x2c1c + j), 0x00000000);
3298                 WREG32((0x2c20 + j), 0x00000000);
3299                 WREG32((0x2c24 + j), 0x00000000);
3300         }
3301         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3302
3303         evergreen_mc_stop(rdev, &save);
3304         if (radeon_mc_wait_for_idle(rdev)) {
3305                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3306         }
3307         /* Lockout access through VGA aperture*/
3308         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3309         /* Update configuration */
3310         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3311                rdev->mc.vram_start >> 12);
3312         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3313                rdev->mc.vram_end >> 12);
3314         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3315                rdev->vram_scratch.gpu_addr >> 12);
3316         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3317         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3318         WREG32(MC_VM_FB_LOCATION, tmp);
3319         /* XXX double check these! */
3320         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3321         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3322         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3323         WREG32(MC_VM_AGP_BASE, 0);
3324         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3325         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3326         if (radeon_mc_wait_for_idle(rdev)) {
3327                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3328         }
3329         evergreen_mc_resume(rdev, &save);
3330         /* we need to own VRAM, so turn off the VGA renderer here
3331          * to stop it overwriting our objects */
3332         rv515_vga_render_disable(rdev);
3333 }
3334
3335 static void si_vram_gtt_location(struct radeon_device *rdev,
3336                                  struct radeon_mc *mc)
3337 {
3338         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3339                 /* leave room for at least 1024M GTT */
3340                 dev_warn(rdev->dev, "limiting VRAM\n");
3341                 mc->real_vram_size = 0xFFC0000000ULL;
3342                 mc->mc_vram_size = 0xFFC0000000ULL;
3343         }
3344         radeon_vram_location(rdev, &rdev->mc, 0);
3345         rdev->mc.gtt_base_align = 0;
3346         radeon_gtt_location(rdev, mc);
3347 }
3348
3349 static int si_mc_init(struct radeon_device *rdev)
3350 {
3351         u32 tmp;
3352         int chansize, numchan;
3353
3354         /* Get VRAM informations */
3355         rdev->mc.vram_is_ddr = true;
3356         tmp = RREG32(MC_ARB_RAMCFG);
3357         if (tmp & CHANSIZE_OVERRIDE) {
3358                 chansize = 16;
3359         } else if (tmp & CHANSIZE_MASK) {
3360                 chansize = 64;
3361         } else {
3362                 chansize = 32;
3363         }
3364         tmp = RREG32(MC_SHARED_CHMAP);
3365         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3366         case 0:
3367         default:
3368                 numchan = 1;
3369                 break;
3370         case 1:
3371                 numchan = 2;
3372                 break;
3373         case 2:
3374                 numchan = 4;
3375                 break;
3376         case 3:
3377                 numchan = 8;
3378                 break;
3379         case 4:
3380                 numchan = 3;
3381                 break;
3382         case 5:
3383                 numchan = 6;
3384                 break;
3385         case 6:
3386                 numchan = 10;
3387                 break;
3388         case 7:
3389                 numchan = 12;
3390                 break;
3391         case 8:
3392                 numchan = 16;
3393                 break;
3394         }
3395         rdev->mc.vram_width = numchan * chansize;
3396         /* Could aper size report 0 ? */
3397         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3398         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3399         /* size in MB on si */
3400         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3401         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3402         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3403         si_vram_gtt_location(rdev, &rdev->mc);
3404         radeon_update_bandwidth_info(rdev);
3405
3406         return 0;
3407 }
3408
3409 /*
3410  * GART
3411  */
3412 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3413 {
3414         /* flush hdp cache */
3415         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3416
3417         /* bits 0-15 are the VM contexts0-15 */
3418         WREG32(VM_INVALIDATE_REQUEST, 1);
3419 }
3420
3421 static int si_pcie_gart_enable(struct radeon_device *rdev)
3422 {
3423         int r, i;
3424
3425         if (rdev->gart.robj == NULL) {
3426                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3427                 return -EINVAL;
3428         }
3429         r = radeon_gart_table_vram_pin(rdev);
3430         if (r)
3431                 return r;
3432         radeon_gart_restore(rdev);
3433         /* Setup TLB control */
3434         WREG32(MC_VM_MX_L1_TLB_CNTL,
3435                (0xA << 7) |
3436                ENABLE_L1_TLB |
3437                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3438                ENABLE_ADVANCED_DRIVER_MODEL |
3439                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3440         /* Setup L2 cache */
3441         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3442                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3443                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3444                EFFECTIVE_L2_QUEUE_SIZE(7) |
3445                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3446         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3447         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3448                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3449         /* setup context0 */
3450         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3451         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3452         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3453         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3454                         (u32)(rdev->dummy_page.addr >> 12));
3455         WREG32(VM_CONTEXT0_CNTL2, 0);
3456         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3457                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3458
3459         WREG32(0x15D4, 0);
3460         WREG32(0x15D8, 0);
3461         WREG32(0x15DC, 0);
3462
3463         /* empty context1-15 */
3464         /* set vm size, must be a multiple of 4 */
3465         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3466         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3467         /* Assign the pt base to something valid for now; the pts used for
3468          * the VMs are determined by the application and setup and assigned
3469          * on the fly in the vm part of radeon_gart.c
3470          */
3471         for (i = 1; i < 16; i++) {
3472                 if (i < 8)
3473                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3474                                rdev->gart.table_addr >> 12);
3475                 else
3476                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3477                                rdev->gart.table_addr >> 12);
3478         }
3479
3480         /* enable context1-15 */
3481         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3482                (u32)(rdev->dummy_page.addr >> 12));
3483         WREG32(VM_CONTEXT1_CNTL2, 4);
3484         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3485                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3486                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3487                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3488                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3489                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3490                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3491                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3492                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3493                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3494                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3495                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3496                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3497
3498         si_pcie_gart_tlb_flush(rdev);
3499         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3500                  (unsigned)(rdev->mc.gtt_size >> 20),
3501                  (unsigned long long)rdev->gart.table_addr);
3502         rdev->gart.ready = true;
3503         return 0;
3504 }
3505
3506 static void si_pcie_gart_disable(struct radeon_device *rdev)
3507 {
3508         /* Disable all tables */
3509         WREG32(VM_CONTEXT0_CNTL, 0);
3510         WREG32(VM_CONTEXT1_CNTL, 0);
3511         /* Setup TLB control */
3512         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3513                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3514         /* Setup L2 cache */
3515         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3516                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3517                EFFECTIVE_L2_QUEUE_SIZE(7) |
3518                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3519         WREG32(VM_L2_CNTL2, 0);
3520         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3521                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3522         radeon_gart_table_vram_unpin(rdev);
3523 }
3524
3525 static void si_pcie_gart_fini(struct radeon_device *rdev)
3526 {
3527         si_pcie_gart_disable(rdev);
3528         radeon_gart_table_vram_free(rdev);
3529         radeon_gart_fini(rdev);
3530 }
3531
3532 /* vm parser */
3533 static bool si_vm_reg_valid(u32 reg)
3534 {
3535         /* context regs are fine */
3536         if (reg >= 0x28000)
3537                 return true;
3538
3539         /* check config regs */
3540         switch (reg) {
3541         case GRBM_GFX_INDEX:
3542         case CP_STRMOUT_CNTL:
3543         case VGT_VTX_VECT_EJECT_REG:
3544         case VGT_CACHE_INVALIDATION:
3545         case VGT_ESGS_RING_SIZE:
3546         case VGT_GSVS_RING_SIZE:
3547         case VGT_GS_VERTEX_REUSE:
3548         case VGT_PRIMITIVE_TYPE:
3549         case VGT_INDEX_TYPE:
3550         case VGT_NUM_INDICES:
3551         case VGT_NUM_INSTANCES:
3552         case VGT_TF_RING_SIZE:
3553         case VGT_HS_OFFCHIP_PARAM:
3554         case VGT_TF_MEMORY_BASE:
3555         case PA_CL_ENHANCE:
3556         case PA_SU_LINE_STIPPLE_VALUE:
3557         case PA_SC_LINE_STIPPLE_STATE:
3558         case PA_SC_ENHANCE:
3559         case SQC_CACHES:
3560         case SPI_STATIC_THREAD_MGMT_1:
3561         case SPI_STATIC_THREAD_MGMT_2:
3562         case SPI_STATIC_THREAD_MGMT_3:
3563         case SPI_PS_MAX_WAVE_ID:
3564         case SPI_CONFIG_CNTL:
3565         case SPI_CONFIG_CNTL_1:
3566         case TA_CNTL_AUX:
3567                 return true;
3568         default:
3569                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3570                 return false;
3571         }
3572 }
3573
3574 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
3575                                   u32 *ib, struct radeon_cs_packet *pkt)
3576 {
3577         switch (pkt->opcode) {
3578         case PACKET3_NOP:
3579         case PACKET3_SET_BASE:
3580         case PACKET3_SET_CE_DE_COUNTERS:
3581         case PACKET3_LOAD_CONST_RAM:
3582         case PACKET3_WRITE_CONST_RAM:
3583         case PACKET3_WRITE_CONST_RAM_OFFSET:
3584         case PACKET3_DUMP_CONST_RAM:
3585         case PACKET3_INCREMENT_CE_COUNTER:
3586         case PACKET3_WAIT_ON_DE_COUNTER:
3587         case PACKET3_CE_WRITE:
3588                 break;
3589         default:
3590                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
3591                 return -EINVAL;
3592         }
3593         return 0;
3594 }
3595
3596 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
3597                                    u32 *ib, struct radeon_cs_packet *pkt)
3598 {
3599         u32 idx = pkt->idx + 1;
3600         u32 idx_value = ib[idx];
3601         u32 start_reg, end_reg, reg, i;
3602         u32 command, info;
3603
3604         switch (pkt->opcode) {
3605         case PACKET3_NOP:
3606         case PACKET3_SET_BASE:
3607         case PACKET3_CLEAR_STATE:
3608         case PACKET3_INDEX_BUFFER_SIZE:
3609         case PACKET3_DISPATCH_DIRECT:
3610         case PACKET3_DISPATCH_INDIRECT:
3611         case PACKET3_ALLOC_GDS:
3612         case PACKET3_WRITE_GDS_RAM:
3613         case PACKET3_ATOMIC_GDS:
3614         case PACKET3_ATOMIC:
3615         case PACKET3_OCCLUSION_QUERY:
3616         case PACKET3_SET_PREDICATION:
3617         case PACKET3_COND_EXEC:
3618         case PACKET3_PRED_EXEC:
3619         case PACKET3_DRAW_INDIRECT:
3620         case PACKET3_DRAW_INDEX_INDIRECT:
3621         case PACKET3_INDEX_BASE:
3622         case PACKET3_DRAW_INDEX_2:
3623         case PACKET3_CONTEXT_CONTROL:
3624         case PACKET3_INDEX_TYPE:
3625         case PACKET3_DRAW_INDIRECT_MULTI:
3626         case PACKET3_DRAW_INDEX_AUTO:
3627         case PACKET3_DRAW_INDEX_IMMD:
3628         case PACKET3_NUM_INSTANCES:
3629         case PACKET3_DRAW_INDEX_MULTI_AUTO:
3630         case PACKET3_STRMOUT_BUFFER_UPDATE:
3631         case PACKET3_DRAW_INDEX_OFFSET_2:
3632         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3633         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
3634         case PACKET3_MPEG_INDEX:
3635         case PACKET3_WAIT_REG_MEM:
3636         case PACKET3_MEM_WRITE:
3637         case PACKET3_PFP_SYNC_ME:
3638         case PACKET3_SURFACE_SYNC:
3639         case PACKET3_EVENT_WRITE:
3640         case PACKET3_EVENT_WRITE_EOP:
3641         case PACKET3_EVENT_WRITE_EOS:
3642         case PACKET3_SET_CONTEXT_REG:
3643         case PACKET3_SET_CONTEXT_REG_INDIRECT:
3644         case PACKET3_SET_SH_REG:
3645         case PACKET3_SET_SH_REG_OFFSET:
3646         case PACKET3_INCREMENT_DE_COUNTER:
3647         case PACKET3_WAIT_ON_CE_COUNTER:
3648         case PACKET3_WAIT_ON_AVAIL_BUFFER:
3649         case PACKET3_ME_WRITE:
3650                 break;
3651         case PACKET3_COPY_DATA:
3652                 if ((idx_value & 0xf00) == 0) {
3653                         reg = ib[idx + 3] * 4;
3654                         if (!si_vm_reg_valid(reg))
3655                                 return -EINVAL;
3656                 }
3657                 break;
3658         case PACKET3_WRITE_DATA:
3659                 if ((idx_value & 0xf00) == 0) {
3660                         start_reg = ib[idx + 1] * 4;
3661                         if (idx_value & 0x10000) {
3662                                 if (!si_vm_reg_valid(start_reg))
3663                                         return -EINVAL;
3664                         } else {
3665                                 for (i = 0; i < (pkt->count - 2); i++) {
3666                                         reg = start_reg + (4 * i);
3667                                         if (!si_vm_reg_valid(reg))
3668                                                 return -EINVAL;
3669                                 }
3670                         }
3671                 }
3672                 break;
3673         case PACKET3_COND_WRITE:
3674                 if (idx_value & 0x100) {
3675                         reg = ib[idx + 5] * 4;
3676                         if (!si_vm_reg_valid(reg))
3677                                 return -EINVAL;
3678                 }
3679                 break;
3680         case PACKET3_COPY_DW:
3681                 if (idx_value & 0x2) {
3682                         reg = ib[idx + 3] * 4;
3683                         if (!si_vm_reg_valid(reg))
3684                                 return -EINVAL;
3685                 }
3686                 break;
3687         case PACKET3_SET_CONFIG_REG:
3688                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3689                 end_reg = 4 * pkt->count + start_reg - 4;
3690                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3691                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3692                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3693                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3694                         return -EINVAL;
3695                 }
3696                 for (i = 0; i < pkt->count; i++) {
3697                         reg = start_reg + (4 * i);
3698                         if (!si_vm_reg_valid(reg))
3699                                 return -EINVAL;
3700                 }
3701                 break;
3702         case PACKET3_CP_DMA:
3703                 command = ib[idx + 4];
3704                 info = ib[idx + 1];
3705                 if (command & PACKET3_CP_DMA_CMD_SAS) {
3706                         /* src address space is register */
3707                         if (((info & 0x60000000) >> 29) == 0) {
3708                                 start_reg = idx_value << 2;
3709                                 if (command & PACKET3_CP_DMA_CMD_SAIC) {
3710                                         reg = start_reg;
3711                                         if (!si_vm_reg_valid(reg)) {
3712                                                 DRM_ERROR("CP DMA Bad SRC register\n");
3713                                                 return -EINVAL;
3714                                         }
3715                                 } else {
3716                                         for (i = 0; i < (command & 0x1fffff); i++) {
3717                                                 reg = start_reg + (4 * i);
3718                                                 if (!si_vm_reg_valid(reg)) {
3719                                                         DRM_ERROR("CP DMA Bad SRC register\n");
3720                                                         return -EINVAL;
3721                                                 }
3722                                         }
3723                                 }
3724                         }
3725                 }
3726                 if (command & PACKET3_CP_DMA_CMD_DAS) {
3727                         /* dst address space is register */
3728                         if (((info & 0x00300000) >> 20) == 0) {
3729                                 start_reg = ib[idx + 2];
3730                                 if (command & PACKET3_CP_DMA_CMD_DAIC) {
3731                                         reg = start_reg;
3732                                         if (!si_vm_reg_valid(reg)) {
3733                                                 DRM_ERROR("CP DMA Bad DST register\n");
3734                                                 return -EINVAL;
3735                                         }
3736                                 } else {
3737                                         for (i = 0; i < (command & 0x1fffff); i++) {
3738                                                 reg = start_reg + (4 * i);
3739                                                 if (!si_vm_reg_valid(reg)) {
3740                                                         DRM_ERROR("CP DMA Bad DST register\n");
3741                                                         return -EINVAL;
3742                                                 }
3743                                         }
3744                                 }
3745                         }
3746                 }
3747                 break;
3748         default:
3749                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
3750                 return -EINVAL;
3751         }
3752         return 0;
3753 }
3754
3755 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
3756                                        u32 *ib, struct radeon_cs_packet *pkt)
3757 {
3758         u32 idx = pkt->idx + 1;
3759         u32 idx_value = ib[idx];
3760         u32 start_reg, reg, i;
3761
3762         switch (pkt->opcode) {
3763         case PACKET3_NOP:
3764         case PACKET3_SET_BASE:
3765         case PACKET3_CLEAR_STATE:
3766         case PACKET3_DISPATCH_DIRECT:
3767         case PACKET3_DISPATCH_INDIRECT:
3768         case PACKET3_ALLOC_GDS:
3769         case PACKET3_WRITE_GDS_RAM:
3770         case PACKET3_ATOMIC_GDS:
3771         case PACKET3_ATOMIC:
3772         case PACKET3_OCCLUSION_QUERY:
3773         case PACKET3_SET_PREDICATION:
3774         case PACKET3_COND_EXEC:
3775         case PACKET3_PRED_EXEC:
3776         case PACKET3_CONTEXT_CONTROL:
3777         case PACKET3_STRMOUT_BUFFER_UPDATE:
3778         case PACKET3_WAIT_REG_MEM:
3779         case PACKET3_MEM_WRITE:
3780         case PACKET3_PFP_SYNC_ME:
3781         case PACKET3_SURFACE_SYNC:
3782         case PACKET3_EVENT_WRITE:
3783         case PACKET3_EVENT_WRITE_EOP:
3784         case PACKET3_EVENT_WRITE_EOS:
3785         case PACKET3_SET_CONTEXT_REG:
3786         case PACKET3_SET_CONTEXT_REG_INDIRECT:
3787         case PACKET3_SET_SH_REG:
3788         case PACKET3_SET_SH_REG_OFFSET:
3789         case PACKET3_INCREMENT_DE_COUNTER:
3790         case PACKET3_WAIT_ON_CE_COUNTER:
3791         case PACKET3_WAIT_ON_AVAIL_BUFFER:
3792         case PACKET3_ME_WRITE:
3793                 break;
3794         case PACKET3_COPY_DATA:
3795                 if ((idx_value & 0xf00) == 0) {
3796                         reg = ib[idx + 3] * 4;
3797                         if (!si_vm_reg_valid(reg))
3798                                 return -EINVAL;
3799                 }
3800                 break;
3801         case PACKET3_WRITE_DATA:
3802                 if ((idx_value & 0xf00) == 0) {
3803                         start_reg = ib[idx + 1] * 4;
3804                         if (idx_value & 0x10000) {
3805                                 if (!si_vm_reg_valid(start_reg))
3806                                         return -EINVAL;
3807                         } else {
3808                                 for (i = 0; i < (pkt->count - 2); i++) {
3809                                         reg = start_reg + (4 * i);
3810                                         if (!si_vm_reg_valid(reg))
3811                                                 return -EINVAL;
3812                                 }
3813                         }
3814                 }
3815                 break;
3816         case PACKET3_COND_WRITE:
3817                 if (idx_value & 0x100) {
3818                         reg = ib[idx + 5] * 4;
3819                         if (!si_vm_reg_valid(reg))
3820                                 return -EINVAL;
3821                 }
3822                 break;
3823         case PACKET3_COPY_DW:
3824                 if (idx_value & 0x2) {
3825                         reg = ib[idx + 3] * 4;
3826                         if (!si_vm_reg_valid(reg))
3827                                 return -EINVAL;
3828                 }
3829                 break;
3830         default:
3831                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
3832                 return -EINVAL;
3833         }
3834         return 0;
3835 }
3836
3837 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3838 {
3839         int ret = 0;
3840         u32 idx = 0;
3841         struct radeon_cs_packet pkt;
3842
3843         do {
3844                 pkt.idx = idx;
3845                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
3846                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
3847                 pkt.one_reg_wr = 0;
3848                 switch (pkt.type) {
3849                 case RADEON_PACKET_TYPE0:
3850                         dev_err(rdev->dev, "Packet0 not allowed!\n");
3851                         ret = -EINVAL;
3852                         break;
3853                 case RADEON_PACKET_TYPE2:
3854                         idx += 1;
3855                         break;
3856                 case RADEON_PACKET_TYPE3:
3857                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3858                         if (ib->is_const_ib)
3859                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
3860                         else {
3861                                 switch (ib->ring) {
3862                                 case RADEON_RING_TYPE_GFX_INDEX:
3863                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
3864                                         break;
3865                                 case CAYMAN_RING_TYPE_CP1_INDEX:
3866                                 case CAYMAN_RING_TYPE_CP2_INDEX:
3867                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
3868                                         break;
3869                                 default:
3870                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
3871                                         ret = -EINVAL;
3872                                         break;
3873                                 }
3874                         }
3875                         idx += pkt.count + 2;
3876                         break;
3877                 default:
3878                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3879                         ret = -EINVAL;
3880                         break;
3881                 }
3882                 if (ret)
3883                         break;
3884         } while (idx < ib->length_dw);
3885
3886         return ret;
3887 }
3888
3889 /*
3890  * vm
3891  */
3892 int si_vm_init(struct radeon_device *rdev)
3893 {
3894         /* number of VMs */
3895         rdev->vm_manager.nvm = 16;
3896         /* base offset of vram pages */
3897         rdev->vm_manager.vram_base_offset = 0;
3898
3899         return 0;
3900 }
3901
3902 void si_vm_fini(struct radeon_device *rdev)
3903 {
3904 }
3905
3906 /**
3907  * si_vm_set_page - update the page tables using the CP
3908  *
3909  * @rdev: radeon_device pointer
3910  * @ib: indirect buffer to fill with commands
3911  * @pe: addr of the page entry
3912  * @addr: dst addr to write into pe
3913  * @count: number of page entries to update
3914  * @incr: increase next addr by incr bytes
3915  * @flags: access flags
3916  *
3917  * Update the page tables using the CP (SI).
3918  */
3919 void si_vm_set_page(struct radeon_device *rdev,
3920                     struct radeon_ib *ib,
3921                     uint64_t pe,
3922                     uint64_t addr, unsigned count,
3923                     uint32_t incr, uint32_t flags)
3924 {
3925         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
3926         uint64_t value;
3927         unsigned ndw;
3928
3929         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
3930                 while (count) {
3931                         ndw = 2 + count * 2;
3932                         if (ndw > 0x3FFE)
3933                                 ndw = 0x3FFE;
3934
3935                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
3936                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
3937                                         WRITE_DATA_DST_SEL(1));
3938                         ib->ptr[ib->length_dw++] = pe;
3939                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3940                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
3941                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
3942                                         value = radeon_vm_map_gart(rdev, addr);
3943                                         value &= 0xFFFFFFFFFFFFF000ULL;
3944                                 } else if (flags & RADEON_VM_PAGE_VALID) {
3945                                         value = addr;
3946                                 } else {
3947                                         value = 0;
3948                                 }
3949                                 addr += incr;
3950                                 value |= r600_flags;
3951                                 ib->ptr[ib->length_dw++] = value;
3952                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3953                         }
3954                 }
3955         } else {
3956                 /* DMA */
3957                 if (flags & RADEON_VM_PAGE_SYSTEM) {
3958                         while (count) {
3959                                 ndw = count * 2;
3960                                 if (ndw > 0xFFFFE)
3961                                         ndw = 0xFFFFE;
3962
3963                                 /* for non-physically contiguous pages (system) */
3964                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
3965                                 ib->ptr[ib->length_dw++] = pe;
3966                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
3967                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
3968                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
3969                                                 value = radeon_vm_map_gart(rdev, addr);
3970                                                 value &= 0xFFFFFFFFFFFFF000ULL;
3971                                         } else if (flags & RADEON_VM_PAGE_VALID) {
3972                                                 value = addr;
3973                                         } else {
3974                                                 value = 0;
3975                                         }
3976                                         addr += incr;
3977                                         value |= r600_flags;
3978                                         ib->ptr[ib->length_dw++] = value;
3979                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
3980                                 }
3981                         }
3982                 } else {
3983                         while (count) {
3984                                 ndw = count * 2;
3985                                 if (ndw > 0xFFFFE)
3986                                         ndw = 0xFFFFE;
3987
3988                                 if (flags & RADEON_VM_PAGE_VALID)
3989                                         value = addr;
3990                                 else
3991                                         value = 0;
3992                                 /* for physically contiguous pages (vram) */
3993                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
3994                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
3995                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
3996                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
3997                                 ib->ptr[ib->length_dw++] = 0;
3998                                 ib->ptr[ib->length_dw++] = value; /* value */
3999                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4000                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4001                                 ib->ptr[ib->length_dw++] = 0;
4002                                 pe += ndw * 4;
4003                                 addr += (ndw / 2) * incr;
4004                                 count -= ndw / 2;
4005                         }
4006                 }
4007                 while (ib->length_dw & 0x7)
4008                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4009         }
4010 }
4011
4012 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4013 {
4014         struct radeon_ring *ring = &rdev->ring[ridx];
4015
4016         if (vm == NULL)
4017                 return;
4018
4019         /* write new base address */
4020         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4021         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4022                                  WRITE_DATA_DST_SEL(0)));
4023
4024         if (vm->id < 8) {
4025                 radeon_ring_write(ring,
4026                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4027         } else {
4028                 radeon_ring_write(ring,
4029                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4030         }
4031         radeon_ring_write(ring, 0);
4032         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4033
4034         /* flush hdp cache */
4035         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4036         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4037                                  WRITE_DATA_DST_SEL(0)));
4038         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4039         radeon_ring_write(ring, 0);
4040         radeon_ring_write(ring, 0x1);
4041
4042         /* bits 0-15 are the VM contexts0-15 */
4043         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4044         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4045                                  WRITE_DATA_DST_SEL(0)));
4046         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4047         radeon_ring_write(ring, 0);
4048         radeon_ring_write(ring, 1 << vm->id);
4049
4050         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4051         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4052         radeon_ring_write(ring, 0x0);
4053 }
4054
4055 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4056 {
4057         struct radeon_ring *ring = &rdev->ring[ridx];
4058
4059         if (vm == NULL)
4060                 return;
4061
4062         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4063         if (vm->id < 8) {
4064                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4065         } else {
4066                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4067         }
4068         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4069
4070         /* flush hdp cache */
4071         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4072         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4073         radeon_ring_write(ring, 1);
4074
4075         /* bits 0-7 are the VM contexts0-7 */
4076         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4077         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4078         radeon_ring_write(ring, 1 << vm->id);
4079 }
4080
4081 /*
4082  * RLC
4083  */
4084 void si_rlc_fini(struct radeon_device *rdev)
4085 {
4086         int r;
4087
4088         /* save restore block */
4089         if (rdev->rlc.save_restore_obj) {
4090                 r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4091                 if (unlikely(r != 0))
4092                         dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
4093                 radeon_bo_unpin(rdev->rlc.save_restore_obj);
4094                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4095
4096                 radeon_bo_unref(&rdev->rlc.save_restore_obj);
4097                 rdev->rlc.save_restore_obj = NULL;
4098         }
4099
4100         /* clear state block */
4101         if (rdev->rlc.clear_state_obj) {
4102                 r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4103                 if (unlikely(r != 0))
4104                         dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
4105                 radeon_bo_unpin(rdev->rlc.clear_state_obj);
4106                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4107
4108                 radeon_bo_unref(&rdev->rlc.clear_state_obj);
4109                 rdev->rlc.clear_state_obj = NULL;
4110         }
4111 }
4112
4113 int si_rlc_init(struct radeon_device *rdev)
4114 {
4115         int r;
4116
4117         /* save restore block */
4118         if (rdev->rlc.save_restore_obj == NULL) {
4119                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4120                                      RADEON_GEM_DOMAIN_VRAM, NULL,
4121                                      &rdev->rlc.save_restore_obj);
4122                 if (r) {
4123                         dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
4124                         return r;
4125                 }
4126         }
4127
4128         r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4129         if (unlikely(r != 0)) {
4130                 si_rlc_fini(rdev);
4131                 return r;
4132         }
4133         r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
4134                           &rdev->rlc.save_restore_gpu_addr);
4135         radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4136         if (r) {
4137                 dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
4138                 si_rlc_fini(rdev);
4139                 return r;
4140         }
4141
4142         /* clear state block */
4143         if (rdev->rlc.clear_state_obj == NULL) {
4144                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4145                                      RADEON_GEM_DOMAIN_VRAM, NULL,
4146                                      &rdev->rlc.clear_state_obj);
4147                 if (r) {
4148                         dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
4149                         si_rlc_fini(rdev);
4150                         return r;
4151                 }
4152         }
4153         r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4154         if (unlikely(r != 0)) {
4155                 si_rlc_fini(rdev);
4156                 return r;
4157         }
4158         r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
4159                           &rdev->rlc.clear_state_gpu_addr);
4160         radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4161         if (r) {
4162                 dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
4163                 si_rlc_fini(rdev);
4164                 return r;
4165         }
4166
4167         return 0;
4168 }
4169
4170 static void si_rlc_stop(struct radeon_device *rdev)
4171 {
4172         WREG32(RLC_CNTL, 0);
4173 }
4174
4175 static void si_rlc_start(struct radeon_device *rdev)
4176 {
4177         WREG32(RLC_CNTL, RLC_ENABLE);
4178 }
4179
4180 static int si_rlc_resume(struct radeon_device *rdev)
4181 {
4182         u32 i;
4183         const __be32 *fw_data;
4184
4185         if (!rdev->rlc_fw)
4186                 return -EINVAL;
4187
4188         si_rlc_stop(rdev);
4189
4190         WREG32(RLC_RL_BASE, 0);
4191         WREG32(RLC_RL_SIZE, 0);
4192         WREG32(RLC_LB_CNTL, 0);
4193         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
4194         WREG32(RLC_LB_CNTR_INIT, 0);
4195
4196         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4197         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4198
4199         WREG32(RLC_MC_CNTL, 0);
4200         WREG32(RLC_UCODE_CNTL, 0);
4201
4202         fw_data = (const __be32 *)rdev->rlc_fw->data;
4203         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
4204                 WREG32(RLC_UCODE_ADDR, i);
4205                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
4206         }
4207         WREG32(RLC_UCODE_ADDR, 0);
4208
4209         si_rlc_start(rdev);
4210
4211         return 0;
4212 }
4213
4214 static void si_enable_interrupts(struct radeon_device *rdev)
4215 {
4216         u32 ih_cntl = RREG32(IH_CNTL);
4217         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4218
4219         ih_cntl |= ENABLE_INTR;
4220         ih_rb_cntl |= IH_RB_ENABLE;
4221         WREG32(IH_CNTL, ih_cntl);
4222         WREG32(IH_RB_CNTL, ih_rb_cntl);
4223         rdev->ih.enabled = true;
4224 }
4225
4226 static void si_disable_interrupts(struct radeon_device *rdev)
4227 {
4228         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4229         u32 ih_cntl = RREG32(IH_CNTL);
4230
4231         ih_rb_cntl &= ~IH_RB_ENABLE;
4232         ih_cntl &= ~ENABLE_INTR;
4233         WREG32(IH_RB_CNTL, ih_rb_cntl);
4234         WREG32(IH_CNTL, ih_cntl);
4235         /* set rptr, wptr to 0 */
4236         WREG32(IH_RB_RPTR, 0);
4237         WREG32(IH_RB_WPTR, 0);
4238         rdev->ih.enabled = false;
4239         rdev->ih.rptr = 0;
4240 }
4241
4242 static void si_disable_interrupt_state(struct radeon_device *rdev)
4243 {
4244         u32 tmp;
4245
4246         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4247         WREG32(CP_INT_CNTL_RING1, 0);
4248         WREG32(CP_INT_CNTL_RING2, 0);
4249         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4250         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
4251         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4252         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
4253         WREG32(GRBM_INT_CNTL, 0);
4254         WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4255         WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4256         if (rdev->num_crtc >= 4) {
4257                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4258                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4259         }
4260         if (rdev->num_crtc >= 6) {
4261                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4262                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4263         }
4264
4265         WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4266         WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4267         if (rdev->num_crtc >= 4) {
4268                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4269                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4270         }
4271         if (rdev->num_crtc >= 6) {
4272                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4273                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4274         }
4275
4276         WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
4277
4278         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4279         WREG32(DC_HPD1_INT_CONTROL, tmp);
4280         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4281         WREG32(DC_HPD2_INT_CONTROL, tmp);
4282         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4283         WREG32(DC_HPD3_INT_CONTROL, tmp);
4284         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4285         WREG32(DC_HPD4_INT_CONTROL, tmp);
4286         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4287         WREG32(DC_HPD5_INT_CONTROL, tmp);
4288         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4289         WREG32(DC_HPD6_INT_CONTROL, tmp);
4290
4291 }
4292
4293 static int si_irq_init(struct radeon_device *rdev)
4294 {
4295         int ret = 0;
4296         int rb_bufsz;
4297         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4298
4299         /* allocate ring */
4300         ret = r600_ih_ring_alloc(rdev);
4301         if (ret)
4302                 return ret;
4303
4304         /* disable irqs */
4305         si_disable_interrupts(rdev);
4306
4307         /* init rlc */
4308         ret = si_rlc_resume(rdev);
4309         if (ret) {
4310                 r600_ih_ring_fini(rdev);
4311                 return ret;
4312         }
4313
4314         /* setup interrupt control */
4315         /* set dummy read address to ring address */
4316         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4317         interrupt_cntl = RREG32(INTERRUPT_CNTL);
4318         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4319          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4320          */
4321         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4322         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4323         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4324         WREG32(INTERRUPT_CNTL, interrupt_cntl);
4325
4326         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4327         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4328
4329         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4330                       IH_WPTR_OVERFLOW_CLEAR |
4331                       (rb_bufsz << 1));
4332
4333         if (rdev->wb.enabled)
4334                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4335
4336         /* set the writeback address whether it's enabled or not */
4337         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4338         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4339
4340         WREG32(IH_RB_CNTL, ih_rb_cntl);
4341
4342         /* set rptr, wptr to 0 */
4343         WREG32(IH_RB_RPTR, 0);
4344         WREG32(IH_RB_WPTR, 0);
4345
4346         /* Default settings for IH_CNTL (disabled at first) */
4347         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4348         /* RPTR_REARM only works if msi's are enabled */
4349         if (rdev->msi_enabled)
4350                 ih_cntl |= RPTR_REARM;
4351         WREG32(IH_CNTL, ih_cntl);
4352
4353         /* force the active interrupt state to all disabled */
4354         si_disable_interrupt_state(rdev);
4355
4356         pci_set_master(rdev->pdev);
4357
4358         /* enable irqs */
4359         si_enable_interrupts(rdev);
4360
4361         return ret;
4362 }
4363
4364 int si_irq_set(struct radeon_device *rdev)
4365 {
4366         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
4367         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
4368         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4369         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
4370         u32 grbm_int_cntl = 0;
4371         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
4372         u32 dma_cntl, dma_cntl1;
4373
4374         if (!rdev->irq.installed) {
4375                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4376                 return -EINVAL;
4377         }
4378         /* don't enable anything if the ih is disabled */
4379         if (!rdev->ih.enabled) {
4380                 si_disable_interrupts(rdev);
4381                 /* force the active interrupt state to all disabled */
4382                 si_disable_interrupt_state(rdev);
4383                 return 0;
4384         }
4385
4386         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4387         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4388         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4389         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4390         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4391         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4392
4393         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4394         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4395
4396         /* enable CP interrupts on all rings */
4397         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4398                 DRM_DEBUG("si_irq_set: sw int gfx\n");
4399                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4400         }
4401         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
4402                 DRM_DEBUG("si_irq_set: sw int cp1\n");
4403                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
4404         }
4405         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
4406                 DRM_DEBUG("si_irq_set: sw int cp2\n");
4407                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
4408         }
4409         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4410                 DRM_DEBUG("si_irq_set: sw int dma\n");
4411                 dma_cntl |= TRAP_ENABLE;
4412         }
4413
4414         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4415                 DRM_DEBUG("si_irq_set: sw int dma1\n");
4416                 dma_cntl1 |= TRAP_ENABLE;
4417         }
4418         if (rdev->irq.crtc_vblank_int[0] ||
4419             atomic_read(&rdev->irq.pflip[0])) {
4420                 DRM_DEBUG("si_irq_set: vblank 0\n");
4421                 crtc1 |= VBLANK_INT_MASK;
4422         }
4423         if (rdev->irq.crtc_vblank_int[1] ||
4424             atomic_read(&rdev->irq.pflip[1])) {
4425                 DRM_DEBUG("si_irq_set: vblank 1\n");
4426                 crtc2 |= VBLANK_INT_MASK;
4427         }
4428         if (rdev->irq.crtc_vblank_int[2] ||
4429             atomic_read(&rdev->irq.pflip[2])) {
4430                 DRM_DEBUG("si_irq_set: vblank 2\n");
4431                 crtc3 |= VBLANK_INT_MASK;
4432         }
4433         if (rdev->irq.crtc_vblank_int[3] ||
4434             atomic_read(&rdev->irq.pflip[3])) {
4435                 DRM_DEBUG("si_irq_set: vblank 3\n");
4436                 crtc4 |= VBLANK_INT_MASK;
4437         }
4438         if (rdev->irq.crtc_vblank_int[4] ||
4439             atomic_read(&rdev->irq.pflip[4])) {
4440                 DRM_DEBUG("si_irq_set: vblank 4\n");
4441                 crtc5 |= VBLANK_INT_MASK;
4442         }
4443         if (rdev->irq.crtc_vblank_int[5] ||
4444             atomic_read(&rdev->irq.pflip[5])) {
4445                 DRM_DEBUG("si_irq_set: vblank 5\n");
4446                 crtc6 |= VBLANK_INT_MASK;
4447         }
4448         if (rdev->irq.hpd[0]) {
4449                 DRM_DEBUG("si_irq_set: hpd 1\n");
4450                 hpd1 |= DC_HPDx_INT_EN;
4451         }
4452         if (rdev->irq.hpd[1]) {
4453                 DRM_DEBUG("si_irq_set: hpd 2\n");
4454                 hpd2 |= DC_HPDx_INT_EN;
4455         }
4456         if (rdev->irq.hpd[2]) {
4457                 DRM_DEBUG("si_irq_set: hpd 3\n");
4458                 hpd3 |= DC_HPDx_INT_EN;
4459         }
4460         if (rdev->irq.hpd[3]) {
4461                 DRM_DEBUG("si_irq_set: hpd 4\n");
4462                 hpd4 |= DC_HPDx_INT_EN;
4463         }
4464         if (rdev->irq.hpd[4]) {
4465                 DRM_DEBUG("si_irq_set: hpd 5\n");
4466                 hpd5 |= DC_HPDx_INT_EN;
4467         }
4468         if (rdev->irq.hpd[5]) {
4469                 DRM_DEBUG("si_irq_set: hpd 6\n");
4470                 hpd6 |= DC_HPDx_INT_EN;
4471         }
4472
4473         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4474         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
4475         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
4476
4477         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
4478         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
4479
4480         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4481
4482         WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4483         WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4484         if (rdev->num_crtc >= 4) {
4485                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4486                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4487         }
4488         if (rdev->num_crtc >= 6) {
4489                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4490                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4491         }
4492
4493         WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
4494         WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
4495         if (rdev->num_crtc >= 4) {
4496                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
4497                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
4498         }
4499         if (rdev->num_crtc >= 6) {
4500                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
4501                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
4502         }
4503
4504         WREG32(DC_HPD1_INT_CONTROL, hpd1);
4505         WREG32(DC_HPD2_INT_CONTROL, hpd2);
4506         WREG32(DC_HPD3_INT_CONTROL, hpd3);
4507         WREG32(DC_HPD4_INT_CONTROL, hpd4);
4508         WREG32(DC_HPD5_INT_CONTROL, hpd5);
4509         WREG32(DC_HPD6_INT_CONTROL, hpd6);
4510
4511         return 0;
4512 }
4513
4514 static inline void si_irq_ack(struct radeon_device *rdev)
4515 {
4516         u32 tmp;
4517
4518         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4519         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4520         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4521         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4522         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4523         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4524         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
4525         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
4526         if (rdev->num_crtc >= 4) {
4527                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
4528                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
4529         }
4530         if (rdev->num_crtc >= 6) {
4531                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
4532                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
4533         }
4534
4535         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
4536                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4537         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
4538                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4539         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
4540                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4541         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
4542                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4543         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4544                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4545         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4546                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4547
4548         if (rdev->num_crtc >= 4) {
4549                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
4550                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4551                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
4552                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4553                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4554                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4555                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4556                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4557                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4558                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4559                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4560                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4561         }
4562
4563         if (rdev->num_crtc >= 6) {
4564                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
4565                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4566                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
4567                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4568                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4569                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4570                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4571                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4572                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4573                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4574                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4575                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4576         }
4577
4578         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
4579                 tmp = RREG32(DC_HPD1_INT_CONTROL);
4580                 tmp |= DC_HPDx_INT_ACK;
4581                 WREG32(DC_HPD1_INT_CONTROL, tmp);
4582         }
4583         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
4584                 tmp = RREG32(DC_HPD2_INT_CONTROL);
4585                 tmp |= DC_HPDx_INT_ACK;
4586                 WREG32(DC_HPD2_INT_CONTROL, tmp);
4587         }
4588         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4589                 tmp = RREG32(DC_HPD3_INT_CONTROL);
4590                 tmp |= DC_HPDx_INT_ACK;
4591                 WREG32(DC_HPD3_INT_CONTROL, tmp);
4592         }
4593         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4594                 tmp = RREG32(DC_HPD4_INT_CONTROL);
4595                 tmp |= DC_HPDx_INT_ACK;
4596                 WREG32(DC_HPD4_INT_CONTROL, tmp);
4597         }
4598         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4599                 tmp = RREG32(DC_HPD5_INT_CONTROL);
4600                 tmp |= DC_HPDx_INT_ACK;
4601                 WREG32(DC_HPD5_INT_CONTROL, tmp);
4602         }
4603         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4604                 tmp = RREG32(DC_HPD5_INT_CONTROL);
4605                 tmp |= DC_HPDx_INT_ACK;
4606                 WREG32(DC_HPD6_INT_CONTROL, tmp);
4607         }
4608 }
4609
4610 static void si_irq_disable(struct radeon_device *rdev)
4611 {
4612         si_disable_interrupts(rdev);
4613         /* Wait and acknowledge irq */
4614         mdelay(1);
4615         si_irq_ack(rdev);
4616         si_disable_interrupt_state(rdev);
4617 }
4618
4619 static void si_irq_suspend(struct radeon_device *rdev)
4620 {
4621         si_irq_disable(rdev);
4622         si_rlc_stop(rdev);
4623 }
4624
4625 static void si_irq_fini(struct radeon_device *rdev)
4626 {
4627         si_irq_suspend(rdev);
4628         r600_ih_ring_fini(rdev);
4629 }
4630
4631 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
4632 {
4633         u32 wptr, tmp;
4634
4635         if (rdev->wb.enabled)
4636                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4637         else
4638                 wptr = RREG32(IH_RB_WPTR);
4639
4640         if (wptr & RB_OVERFLOW) {
4641                 /* When a ring buffer overflow happen start parsing interrupt
4642                  * from the last not overwritten vector (wptr + 16). Hopefully
4643                  * this should allow us to catchup.
4644                  */
4645                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4646                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4647                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4648                 tmp = RREG32(IH_RB_CNTL);
4649                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
4650                 WREG32(IH_RB_CNTL, tmp);
4651         }
4652         return (wptr & rdev->ih.ptr_mask);
4653 }
4654
4655 /*        SI IV Ring
4656  * Each IV ring entry is 128 bits:
4657  * [7:0]    - interrupt source id
4658  * [31:8]   - reserved
4659  * [59:32]  - interrupt source data
4660  * [63:60]  - reserved
4661  * [71:64]  - RINGID
4662  * [79:72]  - VMID
4663  * [127:80] - reserved
4664  */
4665 int si_irq_process(struct radeon_device *rdev)
4666 {
4667         u32 wptr;
4668         u32 rptr;
4669         u32 src_id, src_data, ring_id;
4670         u32 ring_index;
4671         bool queue_hotplug = false;
4672
4673         if (!rdev->ih.enabled || rdev->shutdown)
4674                 return IRQ_NONE;
4675
4676         wptr = si_get_ih_wptr(rdev);
4677
4678 restart_ih:
4679         /* is somebody else already processing irqs? */
4680         if (atomic_xchg(&rdev->ih.lock, 1))
4681                 return IRQ_NONE;
4682
4683         rptr = rdev->ih.rptr;
4684         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
4685
4686         /* Order reading of wptr vs. reading of IH ring data */
4687         rmb();
4688
4689         /* display interrupts */
4690         si_irq_ack(rdev);
4691
4692         while (rptr != wptr) {
4693                 /* wptr/rptr are in bytes! */
4694                 ring_index = rptr / 4;
4695                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
4696                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
4697                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
4698
4699                 switch (src_id) {
4700                 case 1: /* D1 vblank/vline */
4701                         switch (src_data) {
4702                         case 0: /* D1 vblank */
4703                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
4704                                         if (rdev->irq.crtc_vblank_int[0]) {
4705                                                 drm_handle_vblank(rdev->ddev, 0);
4706                                                 rdev->pm.vblank_sync = true;
4707                                                 wake_up(&rdev->irq.vblank_queue);
4708                                         }
4709                                         if (atomic_read(&rdev->irq.pflip[0]))
4710                                                 radeon_crtc_handle_flip(rdev, 0);
4711                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
4712                                         DRM_DEBUG("IH: D1 vblank\n");
4713                                 }
4714                                 break;
4715                         case 1: /* D1 vline */
4716                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
4717                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
4718                                         DRM_DEBUG("IH: D1 vline\n");
4719                                 }
4720                                 break;
4721                         default:
4722                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4723                                 break;
4724                         }
4725                         break;
4726                 case 2: /* D2 vblank/vline */
4727                         switch (src_data) {
4728                         case 0: /* D2 vblank */
4729                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
4730                                         if (rdev->irq.crtc_vblank_int[1]) {
4731                                                 drm_handle_vblank(rdev->ddev, 1);
4732                                                 rdev->pm.vblank_sync = true;
4733                                                 wake_up(&rdev->irq.vblank_queue);
4734                                         }
4735                                         if (atomic_read(&rdev->irq.pflip[1]))
4736                                                 radeon_crtc_handle_flip(rdev, 1);
4737                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
4738                                         DRM_DEBUG("IH: D2 vblank\n");
4739                                 }
4740                                 break;
4741                         case 1: /* D2 vline */
4742                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
4743                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
4744                                         DRM_DEBUG("IH: D2 vline\n");
4745                                 }
4746                                 break;
4747                         default:
4748                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4749                                 break;
4750                         }
4751                         break;
4752                 case 3: /* D3 vblank/vline */
4753                         switch (src_data) {
4754                         case 0: /* D3 vblank */
4755                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
4756                                         if (rdev->irq.crtc_vblank_int[2]) {
4757                                                 drm_handle_vblank(rdev->ddev, 2);
4758                                                 rdev->pm.vblank_sync = true;
4759                                                 wake_up(&rdev->irq.vblank_queue);
4760                                         }
4761                                         if (atomic_read(&rdev->irq.pflip[2]))
4762                                                 radeon_crtc_handle_flip(rdev, 2);
4763                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
4764                                         DRM_DEBUG("IH: D3 vblank\n");
4765                                 }
4766                                 break;
4767                         case 1: /* D3 vline */
4768                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
4769                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
4770                                         DRM_DEBUG("IH: D3 vline\n");
4771                                 }
4772                                 break;
4773                         default:
4774                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4775                                 break;
4776                         }
4777                         break;
4778                 case 4: /* D4 vblank/vline */
4779                         switch (src_data) {
4780                         case 0: /* D4 vblank */
4781                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
4782                                         if (rdev->irq.crtc_vblank_int[3]) {
4783                                                 drm_handle_vblank(rdev->ddev, 3);
4784                                                 rdev->pm.vblank_sync = true;
4785                                                 wake_up(&rdev->irq.vblank_queue);
4786                                         }
4787                                         if (atomic_read(&rdev->irq.pflip[3]))
4788                                                 radeon_crtc_handle_flip(rdev, 3);
4789                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
4790                                         DRM_DEBUG("IH: D4 vblank\n");
4791                                 }
4792                                 break;
4793                         case 1: /* D4 vline */
4794                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
4795                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
4796                                         DRM_DEBUG("IH: D4 vline\n");
4797                                 }
4798                                 break;
4799                         default:
4800                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4801                                 break;
4802                         }
4803                         break;
4804                 case 5: /* D5 vblank/vline */
4805                         switch (src_data) {
4806                         case 0: /* D5 vblank */
4807                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
4808                                         if (rdev->irq.crtc_vblank_int[4]) {
4809                                                 drm_handle_vblank(rdev->ddev, 4);
4810                                                 rdev->pm.vblank_sync = true;
4811                                                 wake_up(&rdev->irq.vblank_queue);
4812                                         }
4813                                         if (atomic_read(&rdev->irq.pflip[4]))
4814                                                 radeon_crtc_handle_flip(rdev, 4);
4815                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
4816                                         DRM_DEBUG("IH: D5 vblank\n");
4817                                 }
4818                                 break;
4819                         case 1: /* D5 vline */
4820                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
4821                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
4822                                         DRM_DEBUG("IH: D5 vline\n");
4823                                 }
4824                                 break;
4825                         default:
4826                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4827                                 break;
4828                         }
4829                         break;
4830                 case 6: /* D6 vblank/vline */
4831                         switch (src_data) {
4832                         case 0: /* D6 vblank */
4833                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
4834                                         if (rdev->irq.crtc_vblank_int[5]) {
4835                                                 drm_handle_vblank(rdev->ddev, 5);
4836                                                 rdev->pm.vblank_sync = true;
4837                                                 wake_up(&rdev->irq.vblank_queue);
4838                                         }
4839                                         if (atomic_read(&rdev->irq.pflip[5]))
4840                                                 radeon_crtc_handle_flip(rdev, 5);
4841                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
4842                                         DRM_DEBUG("IH: D6 vblank\n");
4843                                 }
4844                                 break;
4845                         case 1: /* D6 vline */
4846                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
4847                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
4848                                         DRM_DEBUG("IH: D6 vline\n");
4849                                 }
4850                                 break;
4851                         default:
4852                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4853                                 break;
4854                         }
4855                         break;
4856                 case 42: /* HPD hotplug */
4857                         switch (src_data) {
4858                         case 0:
4859                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
4860                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
4861                                         queue_hotplug = true;
4862                                         DRM_DEBUG("IH: HPD1\n");
4863                                 }
4864                                 break;
4865                         case 1:
4866                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
4867                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
4868                                         queue_hotplug = true;
4869                                         DRM_DEBUG("IH: HPD2\n");
4870                                 }
4871                                 break;
4872                         case 2:
4873                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4874                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
4875                                         queue_hotplug = true;
4876                                         DRM_DEBUG("IH: HPD3\n");
4877                                 }
4878                                 break;
4879                         case 3:
4880                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4881                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
4882                                         queue_hotplug = true;
4883                                         DRM_DEBUG("IH: HPD4\n");
4884                                 }
4885                                 break;
4886                         case 4:
4887                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4888                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
4889                                         queue_hotplug = true;
4890                                         DRM_DEBUG("IH: HPD5\n");
4891                                 }
4892                                 break;
4893                         case 5:
4894                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4895                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
4896                                         queue_hotplug = true;
4897                                         DRM_DEBUG("IH: HPD6\n");
4898                                 }
4899                                 break;
4900                         default:
4901                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4902                                 break;
4903                         }
4904                         break;
4905                 case 146:
4906                 case 147:
4907                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
4908                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4909                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4910                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4911                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4912                         /* reset addr and status */
4913                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
4914                         break;
4915                 case 176: /* RINGID0 CP_INT */
4916                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4917                         break;
4918                 case 177: /* RINGID1 CP_INT */
4919                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
4920                         break;
4921                 case 178: /* RINGID2 CP_INT */
4922                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
4923                         break;
4924                 case 181: /* CP EOP event */
4925                         DRM_DEBUG("IH: CP EOP\n");
4926                         switch (ring_id) {
4927                         case 0:
4928                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4929                                 break;
4930                         case 1:
4931                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
4932                                 break;
4933                         case 2:
4934                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
4935                                 break;
4936                         }
4937                         break;
4938                 case 224: /* DMA trap event */
4939                         DRM_DEBUG("IH: DMA trap\n");
4940                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
4941                         break;
4942                 case 233: /* GUI IDLE */
4943                         DRM_DEBUG("IH: GUI idle\n");
4944                         break;
4945                 case 244: /* DMA trap event */
4946                         DRM_DEBUG("IH: DMA1 trap\n");
4947                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4948                         break;
4949                 default:
4950                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4951                         break;
4952                 }
4953
4954                 /* wptr/rptr are in bytes! */
4955                 rptr += 16;
4956                 rptr &= rdev->ih.ptr_mask;
4957         }
4958         if (queue_hotplug)
4959                 schedule_work(&rdev->hotplug_work);
4960         rdev->ih.rptr = rptr;
4961         WREG32(IH_RB_RPTR, rdev->ih.rptr);
4962         atomic_set(&rdev->ih.lock, 0);
4963
4964         /* make sure wptr hasn't changed while processing */
4965         wptr = si_get_ih_wptr(rdev);
4966         if (wptr != rptr)
4967                 goto restart_ih;
4968
4969         return IRQ_HANDLED;
4970 }
4971
4972 /**
4973  * si_copy_dma - copy pages using the DMA engine
4974  *
4975  * @rdev: radeon_device pointer
4976  * @src_offset: src GPU address
4977  * @dst_offset: dst GPU address
4978  * @num_gpu_pages: number of GPU pages to xfer
4979  * @fence: radeon fence object
4980  *
4981  * Copy GPU paging using the DMA engine (SI).
4982  * Used by the radeon ttm implementation to move pages if
4983  * registered as the asic copy callback.
4984  */
4985 int si_copy_dma(struct radeon_device *rdev,
4986                 uint64_t src_offset, uint64_t dst_offset,
4987                 unsigned num_gpu_pages,
4988                 struct radeon_fence **fence)
4989 {
4990         struct radeon_semaphore *sem = NULL;
4991         int ring_index = rdev->asic->copy.dma_ring_index;
4992         struct radeon_ring *ring = &rdev->ring[ring_index];
4993         u32 size_in_bytes, cur_size_in_bytes;
4994         int i, num_loops;
4995         int r = 0;
4996
4997         r = radeon_semaphore_create(rdev, &sem);
4998         if (r) {
4999                 DRM_ERROR("radeon: moving bo (%d).\n", r);
5000                 return r;
5001         }
5002
5003         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
5004         num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
5005         r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
5006         if (r) {
5007                 DRM_ERROR("radeon: moving bo (%d).\n", r);
5008                 radeon_semaphore_free(rdev, &sem, NULL);
5009                 return r;
5010         }
5011
5012         if (radeon_fence_need_sync(*fence, ring->idx)) {
5013                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
5014                                             ring->idx);
5015                 radeon_fence_note_sync(*fence, ring->idx);
5016         } else {
5017                 radeon_semaphore_free(rdev, &sem, NULL);
5018         }
5019
5020         for (i = 0; i < num_loops; i++) {
5021                 cur_size_in_bytes = size_in_bytes;
5022                 if (cur_size_in_bytes > 0xFFFFF)
5023                         cur_size_in_bytes = 0xFFFFF;
5024                 size_in_bytes -= cur_size_in_bytes;
5025                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
5026                 radeon_ring_write(ring, dst_offset & 0xffffffff);
5027                 radeon_ring_write(ring, src_offset & 0xffffffff);
5028                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
5029                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
5030                 src_offset += cur_size_in_bytes;
5031                 dst_offset += cur_size_in_bytes;
5032         }
5033
5034         r = radeon_fence_emit(rdev, fence, ring->idx);
5035         if (r) {
5036                 radeon_ring_unlock_undo(rdev, ring);
5037                 return r;
5038         }
5039
5040         radeon_ring_unlock_commit(rdev, ring);
5041         radeon_semaphore_free(rdev, &sem, *fence);
5042
5043         return r;
5044 }
5045
5046 /*
5047  * startup/shutdown callbacks
5048  */
5049 static int si_startup(struct radeon_device *rdev)
5050 {
5051         struct radeon_ring *ring;
5052         int r;
5053
5054         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5055             !rdev->rlc_fw || !rdev->mc_fw) {
5056                 r = si_init_microcode(rdev);
5057                 if (r) {
5058                         DRM_ERROR("Failed to load firmware!\n");
5059                         return r;
5060                 }
5061         }
5062
5063         r = si_mc_load_microcode(rdev);
5064         if (r) {
5065                 DRM_ERROR("Failed to load MC firmware!\n");
5066                 return r;
5067         }
5068
5069         r = r600_vram_scratch_init(rdev);
5070         if (r)
5071                 return r;
5072
5073         si_mc_program(rdev);
5074         r = si_pcie_gart_enable(rdev);
5075         if (r)
5076                 return r;
5077         si_gpu_init(rdev);
5078
5079         /* allocate rlc buffers */
5080         r = si_rlc_init(rdev);
5081         if (r) {
5082                 DRM_ERROR("Failed to init rlc BOs!\n");
5083                 return r;
5084         }
5085
5086         /* allocate wb buffer */
5087         r = radeon_wb_init(rdev);
5088         if (r)
5089                 return r;
5090
5091         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5092         if (r) {
5093                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5094                 return r;
5095         }
5096
5097         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5098         if (r) {
5099                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5100                 return r;
5101         }
5102
5103         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5104         if (r) {
5105                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5106                 return r;
5107         }
5108
5109         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
5110         if (r) {
5111                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5112                 return r;
5113         }
5114
5115         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5116         if (r) {
5117                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5118                 return r;
5119         }
5120
5121         r = rv770_uvd_resume(rdev);
5122         if (!r) {
5123                 r = radeon_fence_driver_start_ring(rdev,
5124                                                    R600_RING_TYPE_UVD_INDEX);
5125                 if (r)
5126                         dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
5127         }
5128         if (r)
5129                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
5130
5131         /* Enable IRQ */
5132         r = si_irq_init(rdev);
5133         if (r) {
5134                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
5135                 radeon_irq_kms_fini(rdev);
5136                 return r;
5137         }
5138         si_irq_set(rdev);
5139
5140         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5141         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
5142                              CP_RB0_RPTR, CP_RB0_WPTR,
5143                              0, 0xfffff, RADEON_CP_PACKET2);
5144         if (r)
5145                 return r;
5146
5147         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5148         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
5149                              CP_RB1_RPTR, CP_RB1_WPTR,
5150                              0, 0xfffff, RADEON_CP_PACKET2);
5151         if (r)
5152                 return r;
5153
5154         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5155         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
5156                              CP_RB2_RPTR, CP_RB2_WPTR,
5157                              0, 0xfffff, RADEON_CP_PACKET2);
5158         if (r)
5159                 return r;
5160
5161         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5162         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
5163                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
5164                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
5165                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5166         if (r)
5167                 return r;
5168
5169         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5170         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
5171                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
5172                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
5173                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5174         if (r)
5175                 return r;
5176
5177         r = si_cp_load_microcode(rdev);
5178         if (r)
5179                 return r;
5180         r = si_cp_resume(rdev);
5181         if (r)
5182                 return r;
5183
5184         r = cayman_dma_resume(rdev);
5185         if (r)
5186                 return r;
5187
5188         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5189         if (ring->ring_size) {
5190                 r = radeon_ring_init(rdev, ring, ring->ring_size,
5191                                      R600_WB_UVD_RPTR_OFFSET,
5192                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
5193                                      0, 0xfffff, RADEON_CP_PACKET2);
5194                 if (!r)
5195                         r = r600_uvd_init(rdev);
5196                 if (r)
5197                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
5198         }
5199
5200         r = radeon_ib_pool_init(rdev);
5201         if (r) {
5202                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
5203                 return r;
5204         }
5205
5206         r = radeon_vm_manager_init(rdev);
5207         if (r) {
5208                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
5209                 return r;
5210         }
5211
5212         return 0;
5213 }
5214
5215 int si_resume(struct radeon_device *rdev)
5216 {
5217         int r;
5218
5219         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
5220          * posting will perform necessary task to bring back GPU into good
5221          * shape.
5222          */
5223         /* post card */
5224         atom_asic_init(rdev->mode_info.atom_context);
5225
5226         /* init golden registers */
5227         si_init_golden_registers(rdev);
5228
5229         rdev->accel_working = true;
5230         r = si_startup(rdev);
5231         if (r) {
5232                 DRM_ERROR("si startup failed on resume\n");
5233                 rdev->accel_working = false;
5234                 return r;
5235         }
5236
5237         return r;
5238
5239 }
5240
5241 int si_suspend(struct radeon_device *rdev)
5242 {
5243         radeon_vm_manager_fini(rdev);
5244         si_cp_enable(rdev, false);
5245         cayman_dma_stop(rdev);
5246         r600_uvd_rbc_stop(rdev);
5247         radeon_uvd_suspend(rdev);
5248         si_irq_suspend(rdev);
5249         radeon_wb_disable(rdev);
5250         si_pcie_gart_disable(rdev);
5251         return 0;
5252 }
5253
5254 /* Plan is to move initialization in that function and use
5255  * helper function so that radeon_device_init pretty much
5256  * do nothing more than calling asic specific function. This
5257  * should also allow to remove a bunch of callback function
5258  * like vram_info.
5259  */
5260 int si_init(struct radeon_device *rdev)
5261 {
5262         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5263         int r;
5264
5265         /* Read BIOS */
5266         if (!radeon_get_bios(rdev)) {
5267                 if (ASIC_IS_AVIVO(rdev))
5268                         return -EINVAL;
5269         }
5270         /* Must be an ATOMBIOS */
5271         if (!rdev->is_atom_bios) {
5272                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5273                 return -EINVAL;
5274         }
5275         r = radeon_atombios_init(rdev);
5276         if (r)
5277                 return r;
5278
5279         /* Post card if necessary */
5280         if (!radeon_card_posted(rdev)) {
5281                 if (!rdev->bios) {
5282                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5283                         return -EINVAL;
5284                 }
5285                 DRM_INFO("GPU not posted. posting now...\n");
5286                 atom_asic_init(rdev->mode_info.atom_context);
5287         }
5288         /* init golden registers */
5289         si_init_golden_registers(rdev);
5290         /* Initialize scratch registers */
5291         si_scratch_init(rdev);
5292         /* Initialize surface registers */
5293         radeon_surface_init(rdev);
5294         /* Initialize clocks */
5295         radeon_get_clock_info(rdev->ddev);
5296
5297         /* Fence driver */
5298         r = radeon_fence_driver_init(rdev);
5299         if (r)
5300                 return r;
5301
5302         /* initialize memory controller */
5303         r = si_mc_init(rdev);
5304         if (r)
5305                 return r;
5306         /* Memory manager */
5307         r = radeon_bo_init(rdev);
5308         if (r)
5309                 return r;
5310
5311         r = radeon_irq_kms_init(rdev);
5312         if (r)
5313                 return r;
5314
5315         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5316         ring->ring_obj = NULL;
5317         r600_ring_init(rdev, ring, 1024 * 1024);
5318
5319         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5320         ring->ring_obj = NULL;
5321         r600_ring_init(rdev, ring, 1024 * 1024);
5322
5323         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5324         ring->ring_obj = NULL;
5325         r600_ring_init(rdev, ring, 1024 * 1024);
5326
5327         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5328         ring->ring_obj = NULL;
5329         r600_ring_init(rdev, ring, 64 * 1024);
5330
5331         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5332         ring->ring_obj = NULL;
5333         r600_ring_init(rdev, ring, 64 * 1024);
5334
5335         r = radeon_uvd_init(rdev);
5336         if (!r) {
5337                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5338                 ring->ring_obj = NULL;
5339                 r600_ring_init(rdev, ring, 4096);
5340         }
5341
5342         rdev->ih.ring_obj = NULL;
5343         r600_ih_ring_init(rdev, 64 * 1024);
5344
5345         r = r600_pcie_gart_init(rdev);
5346         if (r)
5347                 return r;
5348
5349         rdev->accel_working = true;
5350         r = si_startup(rdev);
5351         if (r) {
5352                 dev_err(rdev->dev, "disabling GPU acceleration\n");
5353                 si_cp_fini(rdev);
5354                 cayman_dma_fini(rdev);
5355                 si_irq_fini(rdev);
5356                 si_rlc_fini(rdev);
5357                 radeon_wb_fini(rdev);
5358                 radeon_ib_pool_fini(rdev);
5359                 radeon_vm_manager_fini(rdev);
5360                 radeon_irq_kms_fini(rdev);
5361                 si_pcie_gart_fini(rdev);
5362                 rdev->accel_working = false;
5363         }
5364
5365         /* Don't start up if the MC ucode is missing.
5366          * The default clocks and voltages before the MC ucode
5367          * is loaded are not suffient for advanced operations.
5368          */
5369         if (!rdev->mc_fw) {
5370                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5371                 return -EINVAL;
5372         }
5373
5374         return 0;
5375 }
5376
5377 void si_fini(struct radeon_device *rdev)
5378 {
5379         si_cp_fini(rdev);
5380         cayman_dma_fini(rdev);
5381         si_irq_fini(rdev);
5382         si_rlc_fini(rdev);
5383         radeon_wb_fini(rdev);
5384         radeon_vm_manager_fini(rdev);
5385         radeon_ib_pool_fini(rdev);
5386         radeon_irq_kms_fini(rdev);
5387         radeon_uvd_fini(rdev);
5388         si_pcie_gart_fini(rdev);
5389         r600_vram_scratch_fini(rdev);
5390         radeon_gem_fini(rdev);
5391         radeon_fence_driver_fini(rdev);
5392         radeon_bo_fini(rdev);
5393         radeon_atombios_fini(rdev);
5394         kfree(rdev->bios);
5395         rdev->bios = NULL;
5396 }
5397
5398 /**
5399  * si_get_gpu_clock_counter - return GPU clock counter snapshot
5400  *
5401  * @rdev: radeon_device pointer
5402  *
5403  * Fetches a GPU clock counter snapshot (SI).
5404  * Returns the 64 bit clock counter snapshot.
5405  */
5406 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
5407 {
5408         uint64_t clock;
5409
5410         mutex_lock(&rdev->gpu_clock_mutex);
5411         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5412         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
5413                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5414         mutex_unlock(&rdev->gpu_clock_mutex);
5415         return clock;
5416 }
5417
5418 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
5419 {
5420         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
5421         int r;
5422
5423         /* bypass vclk and dclk with bclk */
5424         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5425                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
5426                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5427
5428         /* put PLL in bypass mode */
5429         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
5430
5431         if (!vclk || !dclk) {
5432                 /* keep the Bypass mode, put PLL to sleep */
5433                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5434                 return 0;
5435         }
5436
5437         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
5438                                           16384, 0x03FFFFFF, 0, 128, 5,
5439                                           &fb_div, &vclk_div, &dclk_div);
5440         if (r)
5441                 return r;
5442
5443         /* set RESET_ANTI_MUX to 0 */
5444         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
5445
5446         /* set VCO_MODE to 1 */
5447         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
5448
5449         /* toggle UPLL_SLEEP to 1 then back to 0 */
5450         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5451         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
5452
5453         /* deassert UPLL_RESET */
5454         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5455
5456         mdelay(1);
5457
5458         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5459         if (r)
5460                 return r;
5461
5462         /* assert UPLL_RESET again */
5463         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
5464
5465         /* disable spread spectrum. */
5466         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
5467
5468         /* set feedback divider */
5469         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
5470
5471         /* set ref divider to 0 */
5472         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
5473
5474         if (fb_div < 307200)
5475                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
5476         else
5477                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
5478
5479         /* set PDIV_A and PDIV_B */
5480         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5481                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
5482                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
5483
5484         /* give the PLL some time to settle */
5485         mdelay(15);
5486
5487         /* deassert PLL_RESET */
5488         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5489
5490         mdelay(15);
5491
5492         /* switch from bypass mode to normal mode */
5493         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
5494
5495         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5496         if (r)
5497                 return r;
5498
5499         /* switch VCLK and DCLK selection */
5500         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5501                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
5502                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5503
5504         mdelay(100);
5505
5506         return 0;
5507 }