Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[linux-2.6-microblaze.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37
38
39 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
46
47 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
48 MODULE_FIRMWARE("radeon/tahiti_me.bin");
49 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
50 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
52 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
53
54 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
61
62 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
68
69 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
70 MODULE_FIRMWARE("radeon/VERDE_me.bin");
71 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
72 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
73 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
74 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
75 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
76
77 MODULE_FIRMWARE("radeon/verde_pfp.bin");
78 MODULE_FIRMWARE("radeon/verde_me.bin");
79 MODULE_FIRMWARE("radeon/verde_ce.bin");
80 MODULE_FIRMWARE("radeon/verde_mc.bin");
81 MODULE_FIRMWARE("radeon/verde_rlc.bin");
82 MODULE_FIRMWARE("radeon/verde_smc.bin");
83
84 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
85 MODULE_FIRMWARE("radeon/OLAND_me.bin");
86 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
87 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
88 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
89 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
90 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
91
92 MODULE_FIRMWARE("radeon/oland_pfp.bin");
93 MODULE_FIRMWARE("radeon/oland_me.bin");
94 MODULE_FIRMWARE("radeon/oland_ce.bin");
95 MODULE_FIRMWARE("radeon/oland_mc.bin");
96 MODULE_FIRMWARE("radeon/oland_rlc.bin");
97 MODULE_FIRMWARE("radeon/oland_smc.bin");
98
99 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
100 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
101 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
102 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
105 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
106
107 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
108 MODULE_FIRMWARE("radeon/hainan_me.bin");
109 MODULE_FIRMWARE("radeon/hainan_ce.bin");
110 MODULE_FIRMWARE("radeon/hainan_mc.bin");
111 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
112 MODULE_FIRMWARE("radeon/hainan_smc.bin");
113
114 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
115 static void si_pcie_gen3_enable(struct radeon_device *rdev);
116 static void si_program_aspm(struct radeon_device *rdev);
117 extern void sumo_rlc_fini(struct radeon_device *rdev);
118 extern int sumo_rlc_init(struct radeon_device *rdev);
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
122 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
124 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
125 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
126 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
127 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
128                                          bool enable);
129 static void si_init_pg(struct radeon_device *rdev);
130 static void si_init_cg(struct radeon_device *rdev);
131 static void si_fini_pg(struct radeon_device *rdev);
132 static void si_fini_cg(struct radeon_device *rdev);
133 static void si_rlc_stop(struct radeon_device *rdev);
134
135 static const u32 verde_rlc_save_restore_register_list[] =
136 {
137         (0x8000 << 16) | (0x98f4 >> 2),
138         0x00000000,
139         (0x8040 << 16) | (0x98f4 >> 2),
140         0x00000000,
141         (0x8000 << 16) | (0xe80 >> 2),
142         0x00000000,
143         (0x8040 << 16) | (0xe80 >> 2),
144         0x00000000,
145         (0x8000 << 16) | (0x89bc >> 2),
146         0x00000000,
147         (0x8040 << 16) | (0x89bc >> 2),
148         0x00000000,
149         (0x8000 << 16) | (0x8c1c >> 2),
150         0x00000000,
151         (0x8040 << 16) | (0x8c1c >> 2),
152         0x00000000,
153         (0x9c00 << 16) | (0x98f0 >> 2),
154         0x00000000,
155         (0x9c00 << 16) | (0xe7c >> 2),
156         0x00000000,
157         (0x8000 << 16) | (0x9148 >> 2),
158         0x00000000,
159         (0x8040 << 16) | (0x9148 >> 2),
160         0x00000000,
161         (0x9c00 << 16) | (0x9150 >> 2),
162         0x00000000,
163         (0x9c00 << 16) | (0x897c >> 2),
164         0x00000000,
165         (0x9c00 << 16) | (0x8d8c >> 2),
166         0x00000000,
167         (0x9c00 << 16) | (0xac54 >> 2),
168         0X00000000,
169         0x3,
170         (0x9c00 << 16) | (0x98f8 >> 2),
171         0x00000000,
172         (0x9c00 << 16) | (0x9910 >> 2),
173         0x00000000,
174         (0x9c00 << 16) | (0x9914 >> 2),
175         0x00000000,
176         (0x9c00 << 16) | (0x9918 >> 2),
177         0x00000000,
178         (0x9c00 << 16) | (0x991c >> 2),
179         0x00000000,
180         (0x9c00 << 16) | (0x9920 >> 2),
181         0x00000000,
182         (0x9c00 << 16) | (0x9924 >> 2),
183         0x00000000,
184         (0x9c00 << 16) | (0x9928 >> 2),
185         0x00000000,
186         (0x9c00 << 16) | (0x992c >> 2),
187         0x00000000,
188         (0x9c00 << 16) | (0x9930 >> 2),
189         0x00000000,
190         (0x9c00 << 16) | (0x9934 >> 2),
191         0x00000000,
192         (0x9c00 << 16) | (0x9938 >> 2),
193         0x00000000,
194         (0x9c00 << 16) | (0x993c >> 2),
195         0x00000000,
196         (0x9c00 << 16) | (0x9940 >> 2),
197         0x00000000,
198         (0x9c00 << 16) | (0x9944 >> 2),
199         0x00000000,
200         (0x9c00 << 16) | (0x9948 >> 2),
201         0x00000000,
202         (0x9c00 << 16) | (0x994c >> 2),
203         0x00000000,
204         (0x9c00 << 16) | (0x9950 >> 2),
205         0x00000000,
206         (0x9c00 << 16) | (0x9954 >> 2),
207         0x00000000,
208         (0x9c00 << 16) | (0x9958 >> 2),
209         0x00000000,
210         (0x9c00 << 16) | (0x995c >> 2),
211         0x00000000,
212         (0x9c00 << 16) | (0x9960 >> 2),
213         0x00000000,
214         (0x9c00 << 16) | (0x9964 >> 2),
215         0x00000000,
216         (0x9c00 << 16) | (0x9968 >> 2),
217         0x00000000,
218         (0x9c00 << 16) | (0x996c >> 2),
219         0x00000000,
220         (0x9c00 << 16) | (0x9970 >> 2),
221         0x00000000,
222         (0x9c00 << 16) | (0x9974 >> 2),
223         0x00000000,
224         (0x9c00 << 16) | (0x9978 >> 2),
225         0x00000000,
226         (0x9c00 << 16) | (0x997c >> 2),
227         0x00000000,
228         (0x9c00 << 16) | (0x9980 >> 2),
229         0x00000000,
230         (0x9c00 << 16) | (0x9984 >> 2),
231         0x00000000,
232         (0x9c00 << 16) | (0x9988 >> 2),
233         0x00000000,
234         (0x9c00 << 16) | (0x998c >> 2),
235         0x00000000,
236         (0x9c00 << 16) | (0x8c00 >> 2),
237         0x00000000,
238         (0x9c00 << 16) | (0x8c14 >> 2),
239         0x00000000,
240         (0x9c00 << 16) | (0x8c04 >> 2),
241         0x00000000,
242         (0x9c00 << 16) | (0x8c08 >> 2),
243         0x00000000,
244         (0x8000 << 16) | (0x9b7c >> 2),
245         0x00000000,
246         (0x8040 << 16) | (0x9b7c >> 2),
247         0x00000000,
248         (0x8000 << 16) | (0xe84 >> 2),
249         0x00000000,
250         (0x8040 << 16) | (0xe84 >> 2),
251         0x00000000,
252         (0x8000 << 16) | (0x89c0 >> 2),
253         0x00000000,
254         (0x8040 << 16) | (0x89c0 >> 2),
255         0x00000000,
256         (0x8000 << 16) | (0x914c >> 2),
257         0x00000000,
258         (0x8040 << 16) | (0x914c >> 2),
259         0x00000000,
260         (0x8000 << 16) | (0x8c20 >> 2),
261         0x00000000,
262         (0x8040 << 16) | (0x8c20 >> 2),
263         0x00000000,
264         (0x8000 << 16) | (0x9354 >> 2),
265         0x00000000,
266         (0x8040 << 16) | (0x9354 >> 2),
267         0x00000000,
268         (0x9c00 << 16) | (0x9060 >> 2),
269         0x00000000,
270         (0x9c00 << 16) | (0x9364 >> 2),
271         0x00000000,
272         (0x9c00 << 16) | (0x9100 >> 2),
273         0x00000000,
274         (0x9c00 << 16) | (0x913c >> 2),
275         0x00000000,
276         (0x8000 << 16) | (0x90e0 >> 2),
277         0x00000000,
278         (0x8000 << 16) | (0x90e4 >> 2),
279         0x00000000,
280         (0x8000 << 16) | (0x90e8 >> 2),
281         0x00000000,
282         (0x8040 << 16) | (0x90e0 >> 2),
283         0x00000000,
284         (0x8040 << 16) | (0x90e4 >> 2),
285         0x00000000,
286         (0x8040 << 16) | (0x90e8 >> 2),
287         0x00000000,
288         (0x9c00 << 16) | (0x8bcc >> 2),
289         0x00000000,
290         (0x9c00 << 16) | (0x8b24 >> 2),
291         0x00000000,
292         (0x9c00 << 16) | (0x88c4 >> 2),
293         0x00000000,
294         (0x9c00 << 16) | (0x8e50 >> 2),
295         0x00000000,
296         (0x9c00 << 16) | (0x8c0c >> 2),
297         0x00000000,
298         (0x9c00 << 16) | (0x8e58 >> 2),
299         0x00000000,
300         (0x9c00 << 16) | (0x8e5c >> 2),
301         0x00000000,
302         (0x9c00 << 16) | (0x9508 >> 2),
303         0x00000000,
304         (0x9c00 << 16) | (0x950c >> 2),
305         0x00000000,
306         (0x9c00 << 16) | (0x9494 >> 2),
307         0x00000000,
308         (0x9c00 << 16) | (0xac0c >> 2),
309         0x00000000,
310         (0x9c00 << 16) | (0xac10 >> 2),
311         0x00000000,
312         (0x9c00 << 16) | (0xac14 >> 2),
313         0x00000000,
314         (0x9c00 << 16) | (0xae00 >> 2),
315         0x00000000,
316         (0x9c00 << 16) | (0xac08 >> 2),
317         0x00000000,
318         (0x9c00 << 16) | (0x88d4 >> 2),
319         0x00000000,
320         (0x9c00 << 16) | (0x88c8 >> 2),
321         0x00000000,
322         (0x9c00 << 16) | (0x88cc >> 2),
323         0x00000000,
324         (0x9c00 << 16) | (0x89b0 >> 2),
325         0x00000000,
326         (0x9c00 << 16) | (0x8b10 >> 2),
327         0x00000000,
328         (0x9c00 << 16) | (0x8a14 >> 2),
329         0x00000000,
330         (0x9c00 << 16) | (0x9830 >> 2),
331         0x00000000,
332         (0x9c00 << 16) | (0x9834 >> 2),
333         0x00000000,
334         (0x9c00 << 16) | (0x9838 >> 2),
335         0x00000000,
336         (0x9c00 << 16) | (0x9a10 >> 2),
337         0x00000000,
338         (0x8000 << 16) | (0x9870 >> 2),
339         0x00000000,
340         (0x8000 << 16) | (0x9874 >> 2),
341         0x00000000,
342         (0x8001 << 16) | (0x9870 >> 2),
343         0x00000000,
344         (0x8001 << 16) | (0x9874 >> 2),
345         0x00000000,
346         (0x8040 << 16) | (0x9870 >> 2),
347         0x00000000,
348         (0x8040 << 16) | (0x9874 >> 2),
349         0x00000000,
350         (0x8041 << 16) | (0x9870 >> 2),
351         0x00000000,
352         (0x8041 << 16) | (0x9874 >> 2),
353         0x00000000,
354         0x00000000
355 };
356
357 static const u32 tahiti_golden_rlc_registers[] =
358 {
359         0xc424, 0xffffffff, 0x00601005,
360         0xc47c, 0xffffffff, 0x10104040,
361         0xc488, 0xffffffff, 0x0100000a,
362         0xc314, 0xffffffff, 0x00000800,
363         0xc30c, 0xffffffff, 0x800000f4,
364         0xf4a8, 0xffffffff, 0x00000000
365 };
366
367 static const u32 tahiti_golden_registers[] =
368 {
369         0x9a10, 0x00010000, 0x00018208,
370         0x9830, 0xffffffff, 0x00000000,
371         0x9834, 0xf00fffff, 0x00000400,
372         0x9838, 0x0002021c, 0x00020200,
373         0xc78, 0x00000080, 0x00000000,
374         0xd030, 0x000300c0, 0x00800040,
375         0xd830, 0x000300c0, 0x00800040,
376         0x5bb0, 0x000000f0, 0x00000070,
377         0x5bc0, 0x00200000, 0x50100000,
378         0x7030, 0x31000311, 0x00000011,
379         0x277c, 0x00000003, 0x000007ff,
380         0x240c, 0x000007ff, 0x00000000,
381         0x8a14, 0xf000001f, 0x00000007,
382         0x8b24, 0xffffffff, 0x00ffffff,
383         0x8b10, 0x0000ff0f, 0x00000000,
384         0x28a4c, 0x07ffffff, 0x4e000000,
385         0x28350, 0x3f3f3fff, 0x2a00126a,
386         0x30, 0x000000ff, 0x0040,
387         0x34, 0x00000040, 0x00004040,
388         0x9100, 0x07ffffff, 0x03000000,
389         0x8e88, 0x01ff1f3f, 0x00000000,
390         0x8e84, 0x01ff1f3f, 0x00000000,
391         0x9060, 0x0000007f, 0x00000020,
392         0x9508, 0x00010000, 0x00010000,
393         0xac14, 0x00000200, 0x000002fb,
394         0xac10, 0xffffffff, 0x0000543b,
395         0xac0c, 0xffffffff, 0xa9210876,
396         0x88d0, 0xffffffff, 0x000fff40,
397         0x88d4, 0x0000001f, 0x00000010,
398         0x1410, 0x20000000, 0x20fffed8,
399         0x15c0, 0x000c0fc0, 0x000c0400
400 };
401
402 static const u32 tahiti_golden_registers2[] =
403 {
404         0xc64, 0x00000001, 0x00000001
405 };
406
407 static const u32 pitcairn_golden_rlc_registers[] =
408 {
409         0xc424, 0xffffffff, 0x00601004,
410         0xc47c, 0xffffffff, 0x10102020,
411         0xc488, 0xffffffff, 0x01000020,
412         0xc314, 0xffffffff, 0x00000800,
413         0xc30c, 0xffffffff, 0x800000a4
414 };
415
416 static const u32 pitcairn_golden_registers[] =
417 {
418         0x9a10, 0x00010000, 0x00018208,
419         0x9830, 0xffffffff, 0x00000000,
420         0x9834, 0xf00fffff, 0x00000400,
421         0x9838, 0x0002021c, 0x00020200,
422         0xc78, 0x00000080, 0x00000000,
423         0xd030, 0x000300c0, 0x00800040,
424         0xd830, 0x000300c0, 0x00800040,
425         0x5bb0, 0x000000f0, 0x00000070,
426         0x5bc0, 0x00200000, 0x50100000,
427         0x7030, 0x31000311, 0x00000011,
428         0x2ae4, 0x00073ffe, 0x000022a2,
429         0x240c, 0x000007ff, 0x00000000,
430         0x8a14, 0xf000001f, 0x00000007,
431         0x8b24, 0xffffffff, 0x00ffffff,
432         0x8b10, 0x0000ff0f, 0x00000000,
433         0x28a4c, 0x07ffffff, 0x4e000000,
434         0x28350, 0x3f3f3fff, 0x2a00126a,
435         0x30, 0x000000ff, 0x0040,
436         0x34, 0x00000040, 0x00004040,
437         0x9100, 0x07ffffff, 0x03000000,
438         0x9060, 0x0000007f, 0x00000020,
439         0x9508, 0x00010000, 0x00010000,
440         0xac14, 0x000003ff, 0x000000f7,
441         0xac10, 0xffffffff, 0x00000000,
442         0xac0c, 0xffffffff, 0x32761054,
443         0x88d4, 0x0000001f, 0x00000010,
444         0x15c0, 0x000c0fc0, 0x000c0400
445 };
446
447 static const u32 verde_golden_rlc_registers[] =
448 {
449         0xc424, 0xffffffff, 0x033f1005,
450         0xc47c, 0xffffffff, 0x10808020,
451         0xc488, 0xffffffff, 0x00800008,
452         0xc314, 0xffffffff, 0x00001000,
453         0xc30c, 0xffffffff, 0x80010014
454 };
455
456 static const u32 verde_golden_registers[] =
457 {
458         0x9a10, 0x00010000, 0x00018208,
459         0x9830, 0xffffffff, 0x00000000,
460         0x9834, 0xf00fffff, 0x00000400,
461         0x9838, 0x0002021c, 0x00020200,
462         0xc78, 0x00000080, 0x00000000,
463         0xd030, 0x000300c0, 0x00800040,
464         0xd030, 0x000300c0, 0x00800040,
465         0xd830, 0x000300c0, 0x00800040,
466         0xd830, 0x000300c0, 0x00800040,
467         0x5bb0, 0x000000f0, 0x00000070,
468         0x5bc0, 0x00200000, 0x50100000,
469         0x7030, 0x31000311, 0x00000011,
470         0x2ae4, 0x00073ffe, 0x000022a2,
471         0x2ae4, 0x00073ffe, 0x000022a2,
472         0x2ae4, 0x00073ffe, 0x000022a2,
473         0x240c, 0x000007ff, 0x00000000,
474         0x240c, 0x000007ff, 0x00000000,
475         0x240c, 0x000007ff, 0x00000000,
476         0x8a14, 0xf000001f, 0x00000007,
477         0x8a14, 0xf000001f, 0x00000007,
478         0x8a14, 0xf000001f, 0x00000007,
479         0x8b24, 0xffffffff, 0x00ffffff,
480         0x8b10, 0x0000ff0f, 0x00000000,
481         0x28a4c, 0x07ffffff, 0x4e000000,
482         0x28350, 0x3f3f3fff, 0x0000124a,
483         0x28350, 0x3f3f3fff, 0x0000124a,
484         0x28350, 0x3f3f3fff, 0x0000124a,
485         0x30, 0x000000ff, 0x0040,
486         0x34, 0x00000040, 0x00004040,
487         0x9100, 0x07ffffff, 0x03000000,
488         0x9100, 0x07ffffff, 0x03000000,
489         0x8e88, 0x01ff1f3f, 0x00000000,
490         0x8e88, 0x01ff1f3f, 0x00000000,
491         0x8e88, 0x01ff1f3f, 0x00000000,
492         0x8e84, 0x01ff1f3f, 0x00000000,
493         0x8e84, 0x01ff1f3f, 0x00000000,
494         0x8e84, 0x01ff1f3f, 0x00000000,
495         0x9060, 0x0000007f, 0x00000020,
496         0x9508, 0x00010000, 0x00010000,
497         0xac14, 0x000003ff, 0x00000003,
498         0xac14, 0x000003ff, 0x00000003,
499         0xac14, 0x000003ff, 0x00000003,
500         0xac10, 0xffffffff, 0x00000000,
501         0xac10, 0xffffffff, 0x00000000,
502         0xac10, 0xffffffff, 0x00000000,
503         0xac0c, 0xffffffff, 0x00001032,
504         0xac0c, 0xffffffff, 0x00001032,
505         0xac0c, 0xffffffff, 0x00001032,
506         0x88d4, 0x0000001f, 0x00000010,
507         0x88d4, 0x0000001f, 0x00000010,
508         0x88d4, 0x0000001f, 0x00000010,
509         0x15c0, 0x000c0fc0, 0x000c0400
510 };
511
512 static const u32 oland_golden_rlc_registers[] =
513 {
514         0xc424, 0xffffffff, 0x00601005,
515         0xc47c, 0xffffffff, 0x10104040,
516         0xc488, 0xffffffff, 0x0100000a,
517         0xc314, 0xffffffff, 0x00000800,
518         0xc30c, 0xffffffff, 0x800000f4
519 };
520
521 static const u32 oland_golden_registers[] =
522 {
523         0x9a10, 0x00010000, 0x00018208,
524         0x9830, 0xffffffff, 0x00000000,
525         0x9834, 0xf00fffff, 0x00000400,
526         0x9838, 0x0002021c, 0x00020200,
527         0xc78, 0x00000080, 0x00000000,
528         0xd030, 0x000300c0, 0x00800040,
529         0xd830, 0x000300c0, 0x00800040,
530         0x5bb0, 0x000000f0, 0x00000070,
531         0x5bc0, 0x00200000, 0x50100000,
532         0x7030, 0x31000311, 0x00000011,
533         0x2ae4, 0x00073ffe, 0x000022a2,
534         0x240c, 0x000007ff, 0x00000000,
535         0x8a14, 0xf000001f, 0x00000007,
536         0x8b24, 0xffffffff, 0x00ffffff,
537         0x8b10, 0x0000ff0f, 0x00000000,
538         0x28a4c, 0x07ffffff, 0x4e000000,
539         0x28350, 0x3f3f3fff, 0x00000082,
540         0x30, 0x000000ff, 0x0040,
541         0x34, 0x00000040, 0x00004040,
542         0x9100, 0x07ffffff, 0x03000000,
543         0x9060, 0x0000007f, 0x00000020,
544         0x9508, 0x00010000, 0x00010000,
545         0xac14, 0x000003ff, 0x000000f3,
546         0xac10, 0xffffffff, 0x00000000,
547         0xac0c, 0xffffffff, 0x00003210,
548         0x88d4, 0x0000001f, 0x00000010,
549         0x15c0, 0x000c0fc0, 0x000c0400
550 };
551
552 static const u32 hainan_golden_registers[] =
553 {
554         0x9a10, 0x00010000, 0x00018208,
555         0x9830, 0xffffffff, 0x00000000,
556         0x9834, 0xf00fffff, 0x00000400,
557         0x9838, 0x0002021c, 0x00020200,
558         0xd0c0, 0xff000fff, 0x00000100,
559         0xd030, 0x000300c0, 0x00800040,
560         0xd8c0, 0xff000fff, 0x00000100,
561         0xd830, 0x000300c0, 0x00800040,
562         0x2ae4, 0x00073ffe, 0x000022a2,
563         0x240c, 0x000007ff, 0x00000000,
564         0x8a14, 0xf000001f, 0x00000007,
565         0x8b24, 0xffffffff, 0x00ffffff,
566         0x8b10, 0x0000ff0f, 0x00000000,
567         0x28a4c, 0x07ffffff, 0x4e000000,
568         0x28350, 0x3f3f3fff, 0x00000000,
569         0x30, 0x000000ff, 0x0040,
570         0x34, 0x00000040, 0x00004040,
571         0x9100, 0x03e00000, 0x03600000,
572         0x9060, 0x0000007f, 0x00000020,
573         0x9508, 0x00010000, 0x00010000,
574         0xac14, 0x000003ff, 0x000000f1,
575         0xac10, 0xffffffff, 0x00000000,
576         0xac0c, 0xffffffff, 0x00003210,
577         0x88d4, 0x0000001f, 0x00000010,
578         0x15c0, 0x000c0fc0, 0x000c0400
579 };
580
581 static const u32 hainan_golden_registers2[] =
582 {
583         0x98f8, 0xffffffff, 0x02010001
584 };
585
586 static const u32 tahiti_mgcg_cgcg_init[] =
587 {
588         0xc400, 0xffffffff, 0xfffffffc,
589         0x802c, 0xffffffff, 0xe0000000,
590         0x9a60, 0xffffffff, 0x00000100,
591         0x92a4, 0xffffffff, 0x00000100,
592         0xc164, 0xffffffff, 0x00000100,
593         0x9774, 0xffffffff, 0x00000100,
594         0x8984, 0xffffffff, 0x06000100,
595         0x8a18, 0xffffffff, 0x00000100,
596         0x92a0, 0xffffffff, 0x00000100,
597         0xc380, 0xffffffff, 0x00000100,
598         0x8b28, 0xffffffff, 0x00000100,
599         0x9144, 0xffffffff, 0x00000100,
600         0x8d88, 0xffffffff, 0x00000100,
601         0x8d8c, 0xffffffff, 0x00000100,
602         0x9030, 0xffffffff, 0x00000100,
603         0x9034, 0xffffffff, 0x00000100,
604         0x9038, 0xffffffff, 0x00000100,
605         0x903c, 0xffffffff, 0x00000100,
606         0xad80, 0xffffffff, 0x00000100,
607         0xac54, 0xffffffff, 0x00000100,
608         0x897c, 0xffffffff, 0x06000100,
609         0x9868, 0xffffffff, 0x00000100,
610         0x9510, 0xffffffff, 0x00000100,
611         0xaf04, 0xffffffff, 0x00000100,
612         0xae04, 0xffffffff, 0x00000100,
613         0x949c, 0xffffffff, 0x00000100,
614         0x802c, 0xffffffff, 0xe0000000,
615         0x9160, 0xffffffff, 0x00010000,
616         0x9164, 0xffffffff, 0x00030002,
617         0x9168, 0xffffffff, 0x00040007,
618         0x916c, 0xffffffff, 0x00060005,
619         0x9170, 0xffffffff, 0x00090008,
620         0x9174, 0xffffffff, 0x00020001,
621         0x9178, 0xffffffff, 0x00040003,
622         0x917c, 0xffffffff, 0x00000007,
623         0x9180, 0xffffffff, 0x00060005,
624         0x9184, 0xffffffff, 0x00090008,
625         0x9188, 0xffffffff, 0x00030002,
626         0x918c, 0xffffffff, 0x00050004,
627         0x9190, 0xffffffff, 0x00000008,
628         0x9194, 0xffffffff, 0x00070006,
629         0x9198, 0xffffffff, 0x000a0009,
630         0x919c, 0xffffffff, 0x00040003,
631         0x91a0, 0xffffffff, 0x00060005,
632         0x91a4, 0xffffffff, 0x00000009,
633         0x91a8, 0xffffffff, 0x00080007,
634         0x91ac, 0xffffffff, 0x000b000a,
635         0x91b0, 0xffffffff, 0x00050004,
636         0x91b4, 0xffffffff, 0x00070006,
637         0x91b8, 0xffffffff, 0x0008000b,
638         0x91bc, 0xffffffff, 0x000a0009,
639         0x91c0, 0xffffffff, 0x000d000c,
640         0x91c4, 0xffffffff, 0x00060005,
641         0x91c8, 0xffffffff, 0x00080007,
642         0x91cc, 0xffffffff, 0x0000000b,
643         0x91d0, 0xffffffff, 0x000a0009,
644         0x91d4, 0xffffffff, 0x000d000c,
645         0x91d8, 0xffffffff, 0x00070006,
646         0x91dc, 0xffffffff, 0x00090008,
647         0x91e0, 0xffffffff, 0x0000000c,
648         0x91e4, 0xffffffff, 0x000b000a,
649         0x91e8, 0xffffffff, 0x000e000d,
650         0x91ec, 0xffffffff, 0x00080007,
651         0x91f0, 0xffffffff, 0x000a0009,
652         0x91f4, 0xffffffff, 0x0000000d,
653         0x91f8, 0xffffffff, 0x000c000b,
654         0x91fc, 0xffffffff, 0x000f000e,
655         0x9200, 0xffffffff, 0x00090008,
656         0x9204, 0xffffffff, 0x000b000a,
657         0x9208, 0xffffffff, 0x000c000f,
658         0x920c, 0xffffffff, 0x000e000d,
659         0x9210, 0xffffffff, 0x00110010,
660         0x9214, 0xffffffff, 0x000a0009,
661         0x9218, 0xffffffff, 0x000c000b,
662         0x921c, 0xffffffff, 0x0000000f,
663         0x9220, 0xffffffff, 0x000e000d,
664         0x9224, 0xffffffff, 0x00110010,
665         0x9228, 0xffffffff, 0x000b000a,
666         0x922c, 0xffffffff, 0x000d000c,
667         0x9230, 0xffffffff, 0x00000010,
668         0x9234, 0xffffffff, 0x000f000e,
669         0x9238, 0xffffffff, 0x00120011,
670         0x923c, 0xffffffff, 0x000c000b,
671         0x9240, 0xffffffff, 0x000e000d,
672         0x9244, 0xffffffff, 0x00000011,
673         0x9248, 0xffffffff, 0x0010000f,
674         0x924c, 0xffffffff, 0x00130012,
675         0x9250, 0xffffffff, 0x000d000c,
676         0x9254, 0xffffffff, 0x000f000e,
677         0x9258, 0xffffffff, 0x00100013,
678         0x925c, 0xffffffff, 0x00120011,
679         0x9260, 0xffffffff, 0x00150014,
680         0x9264, 0xffffffff, 0x000e000d,
681         0x9268, 0xffffffff, 0x0010000f,
682         0x926c, 0xffffffff, 0x00000013,
683         0x9270, 0xffffffff, 0x00120011,
684         0x9274, 0xffffffff, 0x00150014,
685         0x9278, 0xffffffff, 0x000f000e,
686         0x927c, 0xffffffff, 0x00110010,
687         0x9280, 0xffffffff, 0x00000014,
688         0x9284, 0xffffffff, 0x00130012,
689         0x9288, 0xffffffff, 0x00160015,
690         0x928c, 0xffffffff, 0x0010000f,
691         0x9290, 0xffffffff, 0x00120011,
692         0x9294, 0xffffffff, 0x00000015,
693         0x9298, 0xffffffff, 0x00140013,
694         0x929c, 0xffffffff, 0x00170016,
695         0x9150, 0xffffffff, 0x96940200,
696         0x8708, 0xffffffff, 0x00900100,
697         0xc478, 0xffffffff, 0x00000080,
698         0xc404, 0xffffffff, 0x0020003f,
699         0x30, 0xffffffff, 0x0000001c,
700         0x34, 0x000f0000, 0x000f0000,
701         0x160c, 0xffffffff, 0x00000100,
702         0x1024, 0xffffffff, 0x00000100,
703         0x102c, 0x00000101, 0x00000000,
704         0x20a8, 0xffffffff, 0x00000104,
705         0x264c, 0x000c0000, 0x000c0000,
706         0x2648, 0x000c0000, 0x000c0000,
707         0x55e4, 0xff000fff, 0x00000100,
708         0x55e8, 0x00000001, 0x00000001,
709         0x2f50, 0x00000001, 0x00000001,
710         0x30cc, 0xc0000fff, 0x00000104,
711         0xc1e4, 0x00000001, 0x00000001,
712         0xd0c0, 0xfffffff0, 0x00000100,
713         0xd8c0, 0xfffffff0, 0x00000100
714 };
715
716 static const u32 pitcairn_mgcg_cgcg_init[] =
717 {
718         0xc400, 0xffffffff, 0xfffffffc,
719         0x802c, 0xffffffff, 0xe0000000,
720         0x9a60, 0xffffffff, 0x00000100,
721         0x92a4, 0xffffffff, 0x00000100,
722         0xc164, 0xffffffff, 0x00000100,
723         0x9774, 0xffffffff, 0x00000100,
724         0x8984, 0xffffffff, 0x06000100,
725         0x8a18, 0xffffffff, 0x00000100,
726         0x92a0, 0xffffffff, 0x00000100,
727         0xc380, 0xffffffff, 0x00000100,
728         0x8b28, 0xffffffff, 0x00000100,
729         0x9144, 0xffffffff, 0x00000100,
730         0x8d88, 0xffffffff, 0x00000100,
731         0x8d8c, 0xffffffff, 0x00000100,
732         0x9030, 0xffffffff, 0x00000100,
733         0x9034, 0xffffffff, 0x00000100,
734         0x9038, 0xffffffff, 0x00000100,
735         0x903c, 0xffffffff, 0x00000100,
736         0xad80, 0xffffffff, 0x00000100,
737         0xac54, 0xffffffff, 0x00000100,
738         0x897c, 0xffffffff, 0x06000100,
739         0x9868, 0xffffffff, 0x00000100,
740         0x9510, 0xffffffff, 0x00000100,
741         0xaf04, 0xffffffff, 0x00000100,
742         0xae04, 0xffffffff, 0x00000100,
743         0x949c, 0xffffffff, 0x00000100,
744         0x802c, 0xffffffff, 0xe0000000,
745         0x9160, 0xffffffff, 0x00010000,
746         0x9164, 0xffffffff, 0x00030002,
747         0x9168, 0xffffffff, 0x00040007,
748         0x916c, 0xffffffff, 0x00060005,
749         0x9170, 0xffffffff, 0x00090008,
750         0x9174, 0xffffffff, 0x00020001,
751         0x9178, 0xffffffff, 0x00040003,
752         0x917c, 0xffffffff, 0x00000007,
753         0x9180, 0xffffffff, 0x00060005,
754         0x9184, 0xffffffff, 0x00090008,
755         0x9188, 0xffffffff, 0x00030002,
756         0x918c, 0xffffffff, 0x00050004,
757         0x9190, 0xffffffff, 0x00000008,
758         0x9194, 0xffffffff, 0x00070006,
759         0x9198, 0xffffffff, 0x000a0009,
760         0x919c, 0xffffffff, 0x00040003,
761         0x91a0, 0xffffffff, 0x00060005,
762         0x91a4, 0xffffffff, 0x00000009,
763         0x91a8, 0xffffffff, 0x00080007,
764         0x91ac, 0xffffffff, 0x000b000a,
765         0x91b0, 0xffffffff, 0x00050004,
766         0x91b4, 0xffffffff, 0x00070006,
767         0x91b8, 0xffffffff, 0x0008000b,
768         0x91bc, 0xffffffff, 0x000a0009,
769         0x91c0, 0xffffffff, 0x000d000c,
770         0x9200, 0xffffffff, 0x00090008,
771         0x9204, 0xffffffff, 0x000b000a,
772         0x9208, 0xffffffff, 0x000c000f,
773         0x920c, 0xffffffff, 0x000e000d,
774         0x9210, 0xffffffff, 0x00110010,
775         0x9214, 0xffffffff, 0x000a0009,
776         0x9218, 0xffffffff, 0x000c000b,
777         0x921c, 0xffffffff, 0x0000000f,
778         0x9220, 0xffffffff, 0x000e000d,
779         0x9224, 0xffffffff, 0x00110010,
780         0x9228, 0xffffffff, 0x000b000a,
781         0x922c, 0xffffffff, 0x000d000c,
782         0x9230, 0xffffffff, 0x00000010,
783         0x9234, 0xffffffff, 0x000f000e,
784         0x9238, 0xffffffff, 0x00120011,
785         0x923c, 0xffffffff, 0x000c000b,
786         0x9240, 0xffffffff, 0x000e000d,
787         0x9244, 0xffffffff, 0x00000011,
788         0x9248, 0xffffffff, 0x0010000f,
789         0x924c, 0xffffffff, 0x00130012,
790         0x9250, 0xffffffff, 0x000d000c,
791         0x9254, 0xffffffff, 0x000f000e,
792         0x9258, 0xffffffff, 0x00100013,
793         0x925c, 0xffffffff, 0x00120011,
794         0x9260, 0xffffffff, 0x00150014,
795         0x9150, 0xffffffff, 0x96940200,
796         0x8708, 0xffffffff, 0x00900100,
797         0xc478, 0xffffffff, 0x00000080,
798         0xc404, 0xffffffff, 0x0020003f,
799         0x30, 0xffffffff, 0x0000001c,
800         0x34, 0x000f0000, 0x000f0000,
801         0x160c, 0xffffffff, 0x00000100,
802         0x1024, 0xffffffff, 0x00000100,
803         0x102c, 0x00000101, 0x00000000,
804         0x20a8, 0xffffffff, 0x00000104,
805         0x55e4, 0xff000fff, 0x00000100,
806         0x55e8, 0x00000001, 0x00000001,
807         0x2f50, 0x00000001, 0x00000001,
808         0x30cc, 0xc0000fff, 0x00000104,
809         0xc1e4, 0x00000001, 0x00000001,
810         0xd0c0, 0xfffffff0, 0x00000100,
811         0xd8c0, 0xfffffff0, 0x00000100
812 };
813
814 static const u32 verde_mgcg_cgcg_init[] =
815 {
816         0xc400, 0xffffffff, 0xfffffffc,
817         0x802c, 0xffffffff, 0xe0000000,
818         0x9a60, 0xffffffff, 0x00000100,
819         0x92a4, 0xffffffff, 0x00000100,
820         0xc164, 0xffffffff, 0x00000100,
821         0x9774, 0xffffffff, 0x00000100,
822         0x8984, 0xffffffff, 0x06000100,
823         0x8a18, 0xffffffff, 0x00000100,
824         0x92a0, 0xffffffff, 0x00000100,
825         0xc380, 0xffffffff, 0x00000100,
826         0x8b28, 0xffffffff, 0x00000100,
827         0x9144, 0xffffffff, 0x00000100,
828         0x8d88, 0xffffffff, 0x00000100,
829         0x8d8c, 0xffffffff, 0x00000100,
830         0x9030, 0xffffffff, 0x00000100,
831         0x9034, 0xffffffff, 0x00000100,
832         0x9038, 0xffffffff, 0x00000100,
833         0x903c, 0xffffffff, 0x00000100,
834         0xad80, 0xffffffff, 0x00000100,
835         0xac54, 0xffffffff, 0x00000100,
836         0x897c, 0xffffffff, 0x06000100,
837         0x9868, 0xffffffff, 0x00000100,
838         0x9510, 0xffffffff, 0x00000100,
839         0xaf04, 0xffffffff, 0x00000100,
840         0xae04, 0xffffffff, 0x00000100,
841         0x949c, 0xffffffff, 0x00000100,
842         0x802c, 0xffffffff, 0xe0000000,
843         0x9160, 0xffffffff, 0x00010000,
844         0x9164, 0xffffffff, 0x00030002,
845         0x9168, 0xffffffff, 0x00040007,
846         0x916c, 0xffffffff, 0x00060005,
847         0x9170, 0xffffffff, 0x00090008,
848         0x9174, 0xffffffff, 0x00020001,
849         0x9178, 0xffffffff, 0x00040003,
850         0x917c, 0xffffffff, 0x00000007,
851         0x9180, 0xffffffff, 0x00060005,
852         0x9184, 0xffffffff, 0x00090008,
853         0x9188, 0xffffffff, 0x00030002,
854         0x918c, 0xffffffff, 0x00050004,
855         0x9190, 0xffffffff, 0x00000008,
856         0x9194, 0xffffffff, 0x00070006,
857         0x9198, 0xffffffff, 0x000a0009,
858         0x919c, 0xffffffff, 0x00040003,
859         0x91a0, 0xffffffff, 0x00060005,
860         0x91a4, 0xffffffff, 0x00000009,
861         0x91a8, 0xffffffff, 0x00080007,
862         0x91ac, 0xffffffff, 0x000b000a,
863         0x91b0, 0xffffffff, 0x00050004,
864         0x91b4, 0xffffffff, 0x00070006,
865         0x91b8, 0xffffffff, 0x0008000b,
866         0x91bc, 0xffffffff, 0x000a0009,
867         0x91c0, 0xffffffff, 0x000d000c,
868         0x9200, 0xffffffff, 0x00090008,
869         0x9204, 0xffffffff, 0x000b000a,
870         0x9208, 0xffffffff, 0x000c000f,
871         0x920c, 0xffffffff, 0x000e000d,
872         0x9210, 0xffffffff, 0x00110010,
873         0x9214, 0xffffffff, 0x000a0009,
874         0x9218, 0xffffffff, 0x000c000b,
875         0x921c, 0xffffffff, 0x0000000f,
876         0x9220, 0xffffffff, 0x000e000d,
877         0x9224, 0xffffffff, 0x00110010,
878         0x9228, 0xffffffff, 0x000b000a,
879         0x922c, 0xffffffff, 0x000d000c,
880         0x9230, 0xffffffff, 0x00000010,
881         0x9234, 0xffffffff, 0x000f000e,
882         0x9238, 0xffffffff, 0x00120011,
883         0x923c, 0xffffffff, 0x000c000b,
884         0x9240, 0xffffffff, 0x000e000d,
885         0x9244, 0xffffffff, 0x00000011,
886         0x9248, 0xffffffff, 0x0010000f,
887         0x924c, 0xffffffff, 0x00130012,
888         0x9250, 0xffffffff, 0x000d000c,
889         0x9254, 0xffffffff, 0x000f000e,
890         0x9258, 0xffffffff, 0x00100013,
891         0x925c, 0xffffffff, 0x00120011,
892         0x9260, 0xffffffff, 0x00150014,
893         0x9150, 0xffffffff, 0x96940200,
894         0x8708, 0xffffffff, 0x00900100,
895         0xc478, 0xffffffff, 0x00000080,
896         0xc404, 0xffffffff, 0x0020003f,
897         0x30, 0xffffffff, 0x0000001c,
898         0x34, 0x000f0000, 0x000f0000,
899         0x160c, 0xffffffff, 0x00000100,
900         0x1024, 0xffffffff, 0x00000100,
901         0x102c, 0x00000101, 0x00000000,
902         0x20a8, 0xffffffff, 0x00000104,
903         0x264c, 0x000c0000, 0x000c0000,
904         0x2648, 0x000c0000, 0x000c0000,
905         0x55e4, 0xff000fff, 0x00000100,
906         0x55e8, 0x00000001, 0x00000001,
907         0x2f50, 0x00000001, 0x00000001,
908         0x30cc, 0xc0000fff, 0x00000104,
909         0xc1e4, 0x00000001, 0x00000001,
910         0xd0c0, 0xfffffff0, 0x00000100,
911         0xd8c0, 0xfffffff0, 0x00000100
912 };
913
914 static const u32 oland_mgcg_cgcg_init[] =
915 {
916         0xc400, 0xffffffff, 0xfffffffc,
917         0x802c, 0xffffffff, 0xe0000000,
918         0x9a60, 0xffffffff, 0x00000100,
919         0x92a4, 0xffffffff, 0x00000100,
920         0xc164, 0xffffffff, 0x00000100,
921         0x9774, 0xffffffff, 0x00000100,
922         0x8984, 0xffffffff, 0x06000100,
923         0x8a18, 0xffffffff, 0x00000100,
924         0x92a0, 0xffffffff, 0x00000100,
925         0xc380, 0xffffffff, 0x00000100,
926         0x8b28, 0xffffffff, 0x00000100,
927         0x9144, 0xffffffff, 0x00000100,
928         0x8d88, 0xffffffff, 0x00000100,
929         0x8d8c, 0xffffffff, 0x00000100,
930         0x9030, 0xffffffff, 0x00000100,
931         0x9034, 0xffffffff, 0x00000100,
932         0x9038, 0xffffffff, 0x00000100,
933         0x903c, 0xffffffff, 0x00000100,
934         0xad80, 0xffffffff, 0x00000100,
935         0xac54, 0xffffffff, 0x00000100,
936         0x897c, 0xffffffff, 0x06000100,
937         0x9868, 0xffffffff, 0x00000100,
938         0x9510, 0xffffffff, 0x00000100,
939         0xaf04, 0xffffffff, 0x00000100,
940         0xae04, 0xffffffff, 0x00000100,
941         0x949c, 0xffffffff, 0x00000100,
942         0x802c, 0xffffffff, 0xe0000000,
943         0x9160, 0xffffffff, 0x00010000,
944         0x9164, 0xffffffff, 0x00030002,
945         0x9168, 0xffffffff, 0x00040007,
946         0x916c, 0xffffffff, 0x00060005,
947         0x9170, 0xffffffff, 0x00090008,
948         0x9174, 0xffffffff, 0x00020001,
949         0x9178, 0xffffffff, 0x00040003,
950         0x917c, 0xffffffff, 0x00000007,
951         0x9180, 0xffffffff, 0x00060005,
952         0x9184, 0xffffffff, 0x00090008,
953         0x9188, 0xffffffff, 0x00030002,
954         0x918c, 0xffffffff, 0x00050004,
955         0x9190, 0xffffffff, 0x00000008,
956         0x9194, 0xffffffff, 0x00070006,
957         0x9198, 0xffffffff, 0x000a0009,
958         0x919c, 0xffffffff, 0x00040003,
959         0x91a0, 0xffffffff, 0x00060005,
960         0x91a4, 0xffffffff, 0x00000009,
961         0x91a8, 0xffffffff, 0x00080007,
962         0x91ac, 0xffffffff, 0x000b000a,
963         0x91b0, 0xffffffff, 0x00050004,
964         0x91b4, 0xffffffff, 0x00070006,
965         0x91b8, 0xffffffff, 0x0008000b,
966         0x91bc, 0xffffffff, 0x000a0009,
967         0x91c0, 0xffffffff, 0x000d000c,
968         0x91c4, 0xffffffff, 0x00060005,
969         0x91c8, 0xffffffff, 0x00080007,
970         0x91cc, 0xffffffff, 0x0000000b,
971         0x91d0, 0xffffffff, 0x000a0009,
972         0x91d4, 0xffffffff, 0x000d000c,
973         0x9150, 0xffffffff, 0x96940200,
974         0x8708, 0xffffffff, 0x00900100,
975         0xc478, 0xffffffff, 0x00000080,
976         0xc404, 0xffffffff, 0x0020003f,
977         0x30, 0xffffffff, 0x0000001c,
978         0x34, 0x000f0000, 0x000f0000,
979         0x160c, 0xffffffff, 0x00000100,
980         0x1024, 0xffffffff, 0x00000100,
981         0x102c, 0x00000101, 0x00000000,
982         0x20a8, 0xffffffff, 0x00000104,
983         0x264c, 0x000c0000, 0x000c0000,
984         0x2648, 0x000c0000, 0x000c0000,
985         0x55e4, 0xff000fff, 0x00000100,
986         0x55e8, 0x00000001, 0x00000001,
987         0x2f50, 0x00000001, 0x00000001,
988         0x30cc, 0xc0000fff, 0x00000104,
989         0xc1e4, 0x00000001, 0x00000001,
990         0xd0c0, 0xfffffff0, 0x00000100,
991         0xd8c0, 0xfffffff0, 0x00000100
992 };
993
994 static const u32 hainan_mgcg_cgcg_init[] =
995 {
996         0xc400, 0xffffffff, 0xfffffffc,
997         0x802c, 0xffffffff, 0xe0000000,
998         0x9a60, 0xffffffff, 0x00000100,
999         0x92a4, 0xffffffff, 0x00000100,
1000         0xc164, 0xffffffff, 0x00000100,
1001         0x9774, 0xffffffff, 0x00000100,
1002         0x8984, 0xffffffff, 0x06000100,
1003         0x8a18, 0xffffffff, 0x00000100,
1004         0x92a0, 0xffffffff, 0x00000100,
1005         0xc380, 0xffffffff, 0x00000100,
1006         0x8b28, 0xffffffff, 0x00000100,
1007         0x9144, 0xffffffff, 0x00000100,
1008         0x8d88, 0xffffffff, 0x00000100,
1009         0x8d8c, 0xffffffff, 0x00000100,
1010         0x9030, 0xffffffff, 0x00000100,
1011         0x9034, 0xffffffff, 0x00000100,
1012         0x9038, 0xffffffff, 0x00000100,
1013         0x903c, 0xffffffff, 0x00000100,
1014         0xad80, 0xffffffff, 0x00000100,
1015         0xac54, 0xffffffff, 0x00000100,
1016         0x897c, 0xffffffff, 0x06000100,
1017         0x9868, 0xffffffff, 0x00000100,
1018         0x9510, 0xffffffff, 0x00000100,
1019         0xaf04, 0xffffffff, 0x00000100,
1020         0xae04, 0xffffffff, 0x00000100,
1021         0x949c, 0xffffffff, 0x00000100,
1022         0x802c, 0xffffffff, 0xe0000000,
1023         0x9160, 0xffffffff, 0x00010000,
1024         0x9164, 0xffffffff, 0x00030002,
1025         0x9168, 0xffffffff, 0x00040007,
1026         0x916c, 0xffffffff, 0x00060005,
1027         0x9170, 0xffffffff, 0x00090008,
1028         0x9174, 0xffffffff, 0x00020001,
1029         0x9178, 0xffffffff, 0x00040003,
1030         0x917c, 0xffffffff, 0x00000007,
1031         0x9180, 0xffffffff, 0x00060005,
1032         0x9184, 0xffffffff, 0x00090008,
1033         0x9188, 0xffffffff, 0x00030002,
1034         0x918c, 0xffffffff, 0x00050004,
1035         0x9190, 0xffffffff, 0x00000008,
1036         0x9194, 0xffffffff, 0x00070006,
1037         0x9198, 0xffffffff, 0x000a0009,
1038         0x919c, 0xffffffff, 0x00040003,
1039         0x91a0, 0xffffffff, 0x00060005,
1040         0x91a4, 0xffffffff, 0x00000009,
1041         0x91a8, 0xffffffff, 0x00080007,
1042         0x91ac, 0xffffffff, 0x000b000a,
1043         0x91b0, 0xffffffff, 0x00050004,
1044         0x91b4, 0xffffffff, 0x00070006,
1045         0x91b8, 0xffffffff, 0x0008000b,
1046         0x91bc, 0xffffffff, 0x000a0009,
1047         0x91c0, 0xffffffff, 0x000d000c,
1048         0x91c4, 0xffffffff, 0x00060005,
1049         0x91c8, 0xffffffff, 0x00080007,
1050         0x91cc, 0xffffffff, 0x0000000b,
1051         0x91d0, 0xffffffff, 0x000a0009,
1052         0x91d4, 0xffffffff, 0x000d000c,
1053         0x9150, 0xffffffff, 0x96940200,
1054         0x8708, 0xffffffff, 0x00900100,
1055         0xc478, 0xffffffff, 0x00000080,
1056         0xc404, 0xffffffff, 0x0020003f,
1057         0x30, 0xffffffff, 0x0000001c,
1058         0x34, 0x000f0000, 0x000f0000,
1059         0x160c, 0xffffffff, 0x00000100,
1060         0x1024, 0xffffffff, 0x00000100,
1061         0x20a8, 0xffffffff, 0x00000104,
1062         0x264c, 0x000c0000, 0x000c0000,
1063         0x2648, 0x000c0000, 0x000c0000,
1064         0x2f50, 0x00000001, 0x00000001,
1065         0x30cc, 0xc0000fff, 0x00000104,
1066         0xc1e4, 0x00000001, 0x00000001,
1067         0xd0c0, 0xfffffff0, 0x00000100,
1068         0xd8c0, 0xfffffff0, 0x00000100
1069 };
1070
1071 static u32 verde_pg_init[] =
1072 {
1073         0x353c, 0xffffffff, 0x40000,
1074         0x3538, 0xffffffff, 0x200010ff,
1075         0x353c, 0xffffffff, 0x0,
1076         0x353c, 0xffffffff, 0x0,
1077         0x353c, 0xffffffff, 0x0,
1078         0x353c, 0xffffffff, 0x0,
1079         0x353c, 0xffffffff, 0x0,
1080         0x353c, 0xffffffff, 0x7007,
1081         0x3538, 0xffffffff, 0x300010ff,
1082         0x353c, 0xffffffff, 0x0,
1083         0x353c, 0xffffffff, 0x0,
1084         0x353c, 0xffffffff, 0x0,
1085         0x353c, 0xffffffff, 0x0,
1086         0x353c, 0xffffffff, 0x0,
1087         0x353c, 0xffffffff, 0x400000,
1088         0x3538, 0xffffffff, 0x100010ff,
1089         0x353c, 0xffffffff, 0x0,
1090         0x353c, 0xffffffff, 0x0,
1091         0x353c, 0xffffffff, 0x0,
1092         0x353c, 0xffffffff, 0x0,
1093         0x353c, 0xffffffff, 0x0,
1094         0x353c, 0xffffffff, 0x120200,
1095         0x3538, 0xffffffff, 0x500010ff,
1096         0x353c, 0xffffffff, 0x0,
1097         0x353c, 0xffffffff, 0x0,
1098         0x353c, 0xffffffff, 0x0,
1099         0x353c, 0xffffffff, 0x0,
1100         0x353c, 0xffffffff, 0x0,
1101         0x353c, 0xffffffff, 0x1e1e16,
1102         0x3538, 0xffffffff, 0x600010ff,
1103         0x353c, 0xffffffff, 0x0,
1104         0x353c, 0xffffffff, 0x0,
1105         0x353c, 0xffffffff, 0x0,
1106         0x353c, 0xffffffff, 0x0,
1107         0x353c, 0xffffffff, 0x0,
1108         0x353c, 0xffffffff, 0x171f1e,
1109         0x3538, 0xffffffff, 0x700010ff,
1110         0x353c, 0xffffffff, 0x0,
1111         0x353c, 0xffffffff, 0x0,
1112         0x353c, 0xffffffff, 0x0,
1113         0x353c, 0xffffffff, 0x0,
1114         0x353c, 0xffffffff, 0x0,
1115         0x353c, 0xffffffff, 0x0,
1116         0x3538, 0xffffffff, 0x9ff,
1117         0x3500, 0xffffffff, 0x0,
1118         0x3504, 0xffffffff, 0x10000800,
1119         0x3504, 0xffffffff, 0xf,
1120         0x3504, 0xffffffff, 0xf,
1121         0x3500, 0xffffffff, 0x4,
1122         0x3504, 0xffffffff, 0x1000051e,
1123         0x3504, 0xffffffff, 0xffff,
1124         0x3504, 0xffffffff, 0xffff,
1125         0x3500, 0xffffffff, 0x8,
1126         0x3504, 0xffffffff, 0x80500,
1127         0x3500, 0xffffffff, 0x12,
1128         0x3504, 0xffffffff, 0x9050c,
1129         0x3500, 0xffffffff, 0x1d,
1130         0x3504, 0xffffffff, 0xb052c,
1131         0x3500, 0xffffffff, 0x2a,
1132         0x3504, 0xffffffff, 0x1053e,
1133         0x3500, 0xffffffff, 0x2d,
1134         0x3504, 0xffffffff, 0x10546,
1135         0x3500, 0xffffffff, 0x30,
1136         0x3504, 0xffffffff, 0xa054e,
1137         0x3500, 0xffffffff, 0x3c,
1138         0x3504, 0xffffffff, 0x1055f,
1139         0x3500, 0xffffffff, 0x3f,
1140         0x3504, 0xffffffff, 0x10567,
1141         0x3500, 0xffffffff, 0x42,
1142         0x3504, 0xffffffff, 0x1056f,
1143         0x3500, 0xffffffff, 0x45,
1144         0x3504, 0xffffffff, 0x10572,
1145         0x3500, 0xffffffff, 0x48,
1146         0x3504, 0xffffffff, 0x20575,
1147         0x3500, 0xffffffff, 0x4c,
1148         0x3504, 0xffffffff, 0x190801,
1149         0x3500, 0xffffffff, 0x67,
1150         0x3504, 0xffffffff, 0x1082a,
1151         0x3500, 0xffffffff, 0x6a,
1152         0x3504, 0xffffffff, 0x1b082d,
1153         0x3500, 0xffffffff, 0x87,
1154         0x3504, 0xffffffff, 0x310851,
1155         0x3500, 0xffffffff, 0xba,
1156         0x3504, 0xffffffff, 0x891,
1157         0x3500, 0xffffffff, 0xbc,
1158         0x3504, 0xffffffff, 0x893,
1159         0x3500, 0xffffffff, 0xbe,
1160         0x3504, 0xffffffff, 0x20895,
1161         0x3500, 0xffffffff, 0xc2,
1162         0x3504, 0xffffffff, 0x20899,
1163         0x3500, 0xffffffff, 0xc6,
1164         0x3504, 0xffffffff, 0x2089d,
1165         0x3500, 0xffffffff, 0xca,
1166         0x3504, 0xffffffff, 0x8a1,
1167         0x3500, 0xffffffff, 0xcc,
1168         0x3504, 0xffffffff, 0x8a3,
1169         0x3500, 0xffffffff, 0xce,
1170         0x3504, 0xffffffff, 0x308a5,
1171         0x3500, 0xffffffff, 0xd3,
1172         0x3504, 0xffffffff, 0x6d08cd,
1173         0x3500, 0xffffffff, 0x142,
1174         0x3504, 0xffffffff, 0x2000095a,
1175         0x3504, 0xffffffff, 0x1,
1176         0x3500, 0xffffffff, 0x144,
1177         0x3504, 0xffffffff, 0x301f095b,
1178         0x3500, 0xffffffff, 0x165,
1179         0x3504, 0xffffffff, 0xc094d,
1180         0x3500, 0xffffffff, 0x173,
1181         0x3504, 0xffffffff, 0xf096d,
1182         0x3500, 0xffffffff, 0x184,
1183         0x3504, 0xffffffff, 0x15097f,
1184         0x3500, 0xffffffff, 0x19b,
1185         0x3504, 0xffffffff, 0xc0998,
1186         0x3500, 0xffffffff, 0x1a9,
1187         0x3504, 0xffffffff, 0x409a7,
1188         0x3500, 0xffffffff, 0x1af,
1189         0x3504, 0xffffffff, 0xcdc,
1190         0x3500, 0xffffffff, 0x1b1,
1191         0x3504, 0xffffffff, 0x800,
1192         0x3508, 0xffffffff, 0x6c9b2000,
1193         0x3510, 0xfc00, 0x2000,
1194         0x3544, 0xffffffff, 0xfc0,
1195         0x28d4, 0x00000100, 0x100
1196 };
1197
1198 static void si_init_golden_registers(struct radeon_device *rdev)
1199 {
1200         switch (rdev->family) {
1201         case CHIP_TAHITI:
1202                 radeon_program_register_sequence(rdev,
1203                                                  tahiti_golden_registers,
1204                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1205                 radeon_program_register_sequence(rdev,
1206                                                  tahiti_golden_rlc_registers,
1207                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1208                 radeon_program_register_sequence(rdev,
1209                                                  tahiti_mgcg_cgcg_init,
1210                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1211                 radeon_program_register_sequence(rdev,
1212                                                  tahiti_golden_registers2,
1213                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1214                 break;
1215         case CHIP_PITCAIRN:
1216                 radeon_program_register_sequence(rdev,
1217                                                  pitcairn_golden_registers,
1218                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1219                 radeon_program_register_sequence(rdev,
1220                                                  pitcairn_golden_rlc_registers,
1221                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1222                 radeon_program_register_sequence(rdev,
1223                                                  pitcairn_mgcg_cgcg_init,
1224                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1225                 break;
1226         case CHIP_VERDE:
1227                 radeon_program_register_sequence(rdev,
1228                                                  verde_golden_registers,
1229                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1230                 radeon_program_register_sequence(rdev,
1231                                                  verde_golden_rlc_registers,
1232                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1233                 radeon_program_register_sequence(rdev,
1234                                                  verde_mgcg_cgcg_init,
1235                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1236                 radeon_program_register_sequence(rdev,
1237                                                  verde_pg_init,
1238                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1239                 break;
1240         case CHIP_OLAND:
1241                 radeon_program_register_sequence(rdev,
1242                                                  oland_golden_registers,
1243                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1244                 radeon_program_register_sequence(rdev,
1245                                                  oland_golden_rlc_registers,
1246                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1247                 radeon_program_register_sequence(rdev,
1248                                                  oland_mgcg_cgcg_init,
1249                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1250                 break;
1251         case CHIP_HAINAN:
1252                 radeon_program_register_sequence(rdev,
1253                                                  hainan_golden_registers,
1254                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1255                 radeon_program_register_sequence(rdev,
1256                                                  hainan_golden_registers2,
1257                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1258                 radeon_program_register_sequence(rdev,
1259                                                  hainan_mgcg_cgcg_init,
1260                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1261                 break;
1262         default:
1263                 break;
1264         }
1265 }
1266
1267 #define PCIE_BUS_CLK                10000
1268 #define TCLK                        (PCIE_BUS_CLK / 10)
1269
1270 /**
1271  * si_get_xclk - get the xclk
1272  *
1273  * @rdev: radeon_device pointer
1274  *
1275  * Returns the reference clock used by the gfx engine
1276  * (SI).
1277  */
1278 u32 si_get_xclk(struct radeon_device *rdev)
1279 {
1280         u32 reference_clock = rdev->clock.spll.reference_freq;
1281         u32 tmp;
1282
1283         tmp = RREG32(CG_CLKPIN_CNTL_2);
1284         if (tmp & MUX_TCLK_TO_XCLK)
1285                 return TCLK;
1286
1287         tmp = RREG32(CG_CLKPIN_CNTL);
1288         if (tmp & XTALIN_DIVIDE)
1289                 return reference_clock / 4;
1290
1291         return reference_clock;
1292 }
1293
1294 /* get temperature in millidegrees */
1295 int si_get_temp(struct radeon_device *rdev)
1296 {
1297         u32 temp;
1298         int actual_temp = 0;
1299
1300         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1301                 CTF_TEMP_SHIFT;
1302
1303         if (temp & 0x200)
1304                 actual_temp = 255;
1305         else
1306                 actual_temp = temp & 0x1ff;
1307
1308         actual_temp = (actual_temp * 1000);
1309
1310         return actual_temp;
1311 }
1312
1313 #define TAHITI_IO_MC_REGS_SIZE 36
1314
1315 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1316         {0x0000006f, 0x03044000},
1317         {0x00000070, 0x0480c018},
1318         {0x00000071, 0x00000040},
1319         {0x00000072, 0x01000000},
1320         {0x00000074, 0x000000ff},
1321         {0x00000075, 0x00143400},
1322         {0x00000076, 0x08ec0800},
1323         {0x00000077, 0x040000cc},
1324         {0x00000079, 0x00000000},
1325         {0x0000007a, 0x21000409},
1326         {0x0000007c, 0x00000000},
1327         {0x0000007d, 0xe8000000},
1328         {0x0000007e, 0x044408a8},
1329         {0x0000007f, 0x00000003},
1330         {0x00000080, 0x00000000},
1331         {0x00000081, 0x01000000},
1332         {0x00000082, 0x02000000},
1333         {0x00000083, 0x00000000},
1334         {0x00000084, 0xe3f3e4f4},
1335         {0x00000085, 0x00052024},
1336         {0x00000087, 0x00000000},
1337         {0x00000088, 0x66036603},
1338         {0x00000089, 0x01000000},
1339         {0x0000008b, 0x1c0a0000},
1340         {0x0000008c, 0xff010000},
1341         {0x0000008e, 0xffffefff},
1342         {0x0000008f, 0xfff3efff},
1343         {0x00000090, 0xfff3efbf},
1344         {0x00000094, 0x00101101},
1345         {0x00000095, 0x00000fff},
1346         {0x00000096, 0x00116fff},
1347         {0x00000097, 0x60010000},
1348         {0x00000098, 0x10010000},
1349         {0x00000099, 0x00006000},
1350         {0x0000009a, 0x00001000},
1351         {0x0000009f, 0x00a77400}
1352 };
1353
1354 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1355         {0x0000006f, 0x03044000},
1356         {0x00000070, 0x0480c018},
1357         {0x00000071, 0x00000040},
1358         {0x00000072, 0x01000000},
1359         {0x00000074, 0x000000ff},
1360         {0x00000075, 0x00143400},
1361         {0x00000076, 0x08ec0800},
1362         {0x00000077, 0x040000cc},
1363         {0x00000079, 0x00000000},
1364         {0x0000007a, 0x21000409},
1365         {0x0000007c, 0x00000000},
1366         {0x0000007d, 0xe8000000},
1367         {0x0000007e, 0x044408a8},
1368         {0x0000007f, 0x00000003},
1369         {0x00000080, 0x00000000},
1370         {0x00000081, 0x01000000},
1371         {0x00000082, 0x02000000},
1372         {0x00000083, 0x00000000},
1373         {0x00000084, 0xe3f3e4f4},
1374         {0x00000085, 0x00052024},
1375         {0x00000087, 0x00000000},
1376         {0x00000088, 0x66036603},
1377         {0x00000089, 0x01000000},
1378         {0x0000008b, 0x1c0a0000},
1379         {0x0000008c, 0xff010000},
1380         {0x0000008e, 0xffffefff},
1381         {0x0000008f, 0xfff3efff},
1382         {0x00000090, 0xfff3efbf},
1383         {0x00000094, 0x00101101},
1384         {0x00000095, 0x00000fff},
1385         {0x00000096, 0x00116fff},
1386         {0x00000097, 0x60010000},
1387         {0x00000098, 0x10010000},
1388         {0x00000099, 0x00006000},
1389         {0x0000009a, 0x00001000},
1390         {0x0000009f, 0x00a47400}
1391 };
1392
1393 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1394         {0x0000006f, 0x03044000},
1395         {0x00000070, 0x0480c018},
1396         {0x00000071, 0x00000040},
1397         {0x00000072, 0x01000000},
1398         {0x00000074, 0x000000ff},
1399         {0x00000075, 0x00143400},
1400         {0x00000076, 0x08ec0800},
1401         {0x00000077, 0x040000cc},
1402         {0x00000079, 0x00000000},
1403         {0x0000007a, 0x21000409},
1404         {0x0000007c, 0x00000000},
1405         {0x0000007d, 0xe8000000},
1406         {0x0000007e, 0x044408a8},
1407         {0x0000007f, 0x00000003},
1408         {0x00000080, 0x00000000},
1409         {0x00000081, 0x01000000},
1410         {0x00000082, 0x02000000},
1411         {0x00000083, 0x00000000},
1412         {0x00000084, 0xe3f3e4f4},
1413         {0x00000085, 0x00052024},
1414         {0x00000087, 0x00000000},
1415         {0x00000088, 0x66036603},
1416         {0x00000089, 0x01000000},
1417         {0x0000008b, 0x1c0a0000},
1418         {0x0000008c, 0xff010000},
1419         {0x0000008e, 0xffffefff},
1420         {0x0000008f, 0xfff3efff},
1421         {0x00000090, 0xfff3efbf},
1422         {0x00000094, 0x00101101},
1423         {0x00000095, 0x00000fff},
1424         {0x00000096, 0x00116fff},
1425         {0x00000097, 0x60010000},
1426         {0x00000098, 0x10010000},
1427         {0x00000099, 0x00006000},
1428         {0x0000009a, 0x00001000},
1429         {0x0000009f, 0x00a37400}
1430 };
1431
1432 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1433         {0x0000006f, 0x03044000},
1434         {0x00000070, 0x0480c018},
1435         {0x00000071, 0x00000040},
1436         {0x00000072, 0x01000000},
1437         {0x00000074, 0x000000ff},
1438         {0x00000075, 0x00143400},
1439         {0x00000076, 0x08ec0800},
1440         {0x00000077, 0x040000cc},
1441         {0x00000079, 0x00000000},
1442         {0x0000007a, 0x21000409},
1443         {0x0000007c, 0x00000000},
1444         {0x0000007d, 0xe8000000},
1445         {0x0000007e, 0x044408a8},
1446         {0x0000007f, 0x00000003},
1447         {0x00000080, 0x00000000},
1448         {0x00000081, 0x01000000},
1449         {0x00000082, 0x02000000},
1450         {0x00000083, 0x00000000},
1451         {0x00000084, 0xe3f3e4f4},
1452         {0x00000085, 0x00052024},
1453         {0x00000087, 0x00000000},
1454         {0x00000088, 0x66036603},
1455         {0x00000089, 0x01000000},
1456         {0x0000008b, 0x1c0a0000},
1457         {0x0000008c, 0xff010000},
1458         {0x0000008e, 0xffffefff},
1459         {0x0000008f, 0xfff3efff},
1460         {0x00000090, 0xfff3efbf},
1461         {0x00000094, 0x00101101},
1462         {0x00000095, 0x00000fff},
1463         {0x00000096, 0x00116fff},
1464         {0x00000097, 0x60010000},
1465         {0x00000098, 0x10010000},
1466         {0x00000099, 0x00006000},
1467         {0x0000009a, 0x00001000},
1468         {0x0000009f, 0x00a17730}
1469 };
1470
1471 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1472         {0x0000006f, 0x03044000},
1473         {0x00000070, 0x0480c018},
1474         {0x00000071, 0x00000040},
1475         {0x00000072, 0x01000000},
1476         {0x00000074, 0x000000ff},
1477         {0x00000075, 0x00143400},
1478         {0x00000076, 0x08ec0800},
1479         {0x00000077, 0x040000cc},
1480         {0x00000079, 0x00000000},
1481         {0x0000007a, 0x21000409},
1482         {0x0000007c, 0x00000000},
1483         {0x0000007d, 0xe8000000},
1484         {0x0000007e, 0x044408a8},
1485         {0x0000007f, 0x00000003},
1486         {0x00000080, 0x00000000},
1487         {0x00000081, 0x01000000},
1488         {0x00000082, 0x02000000},
1489         {0x00000083, 0x00000000},
1490         {0x00000084, 0xe3f3e4f4},
1491         {0x00000085, 0x00052024},
1492         {0x00000087, 0x00000000},
1493         {0x00000088, 0x66036603},
1494         {0x00000089, 0x01000000},
1495         {0x0000008b, 0x1c0a0000},
1496         {0x0000008c, 0xff010000},
1497         {0x0000008e, 0xffffefff},
1498         {0x0000008f, 0xfff3efff},
1499         {0x00000090, 0xfff3efbf},
1500         {0x00000094, 0x00101101},
1501         {0x00000095, 0x00000fff},
1502         {0x00000096, 0x00116fff},
1503         {0x00000097, 0x60010000},
1504         {0x00000098, 0x10010000},
1505         {0x00000099, 0x00006000},
1506         {0x0000009a, 0x00001000},
1507         {0x0000009f, 0x00a07730}
1508 };
1509
1510 /* ucode loading */
1511 int si_mc_load_microcode(struct radeon_device *rdev)
1512 {
1513         const __be32 *fw_data = NULL;
1514         const __le32 *new_fw_data = NULL;
1515         u32 running, blackout = 0;
1516         u32 *io_mc_regs = NULL;
1517         const __le32 *new_io_mc_regs = NULL;
1518         int i, regs_size, ucode_size;
1519
1520         if (!rdev->mc_fw)
1521                 return -EINVAL;
1522
1523         if (rdev->new_fw) {
1524                 const struct mc_firmware_header_v1_0 *hdr =
1525                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1526
1527                 radeon_ucode_print_mc_hdr(&hdr->header);
1528                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1529                 new_io_mc_regs = (const __le32 *)
1530                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1531                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1532                 new_fw_data = (const __le32 *)
1533                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1534         } else {
1535                 ucode_size = rdev->mc_fw->size / 4;
1536
1537                 switch (rdev->family) {
1538                 case CHIP_TAHITI:
1539                         io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1540                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1541                         break;
1542                 case CHIP_PITCAIRN:
1543                         io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1544                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1545                         break;
1546                 case CHIP_VERDE:
1547                 default:
1548                         io_mc_regs = (u32 *)&verde_io_mc_regs;
1549                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1550                         break;
1551                 case CHIP_OLAND:
1552                         io_mc_regs = (u32 *)&oland_io_mc_regs;
1553                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1554                         break;
1555                 case CHIP_HAINAN:
1556                         io_mc_regs = (u32 *)&hainan_io_mc_regs;
1557                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1558                         break;
1559                 }
1560                 fw_data = (const __be32 *)rdev->mc_fw->data;
1561         }
1562
1563         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1564
1565         if (running == 0) {
1566                 if (running) {
1567                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1568                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1569                 }
1570
1571                 /* reset the engine and set to writable */
1572                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1573                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1574
1575                 /* load mc io regs */
1576                 for (i = 0; i < regs_size; i++) {
1577                         if (rdev->new_fw) {
1578                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1579                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1580                         } else {
1581                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1582                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1583                         }
1584                 }
1585                 /* load the MC ucode */
1586                 for (i = 0; i < ucode_size; i++) {
1587                         if (rdev->new_fw)
1588                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1589                         else
1590                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1591                 }
1592
1593                 /* put the engine back into the active state */
1594                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1595                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1596                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1597
1598                 /* wait for training to complete */
1599                 for (i = 0; i < rdev->usec_timeout; i++) {
1600                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1601                                 break;
1602                         udelay(1);
1603                 }
1604                 for (i = 0; i < rdev->usec_timeout; i++) {
1605                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1606                                 break;
1607                         udelay(1);
1608                 }
1609
1610                 if (running)
1611                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1612         }
1613
1614         return 0;
1615 }
1616
1617 static int si_init_microcode(struct radeon_device *rdev)
1618 {
1619         const char *chip_name;
1620         const char *new_chip_name;
1621         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1622         size_t smc_req_size, mc2_req_size;
1623         char fw_name[30];
1624         int err;
1625         int new_fw = 0;
1626
1627         DRM_DEBUG("\n");
1628
1629         switch (rdev->family) {
1630         case CHIP_TAHITI:
1631                 chip_name = "TAHITI";
1632                 new_chip_name = "tahiti";
1633                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1634                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1635                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1636                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1637                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1638                 mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1639                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1640                 break;
1641         case CHIP_PITCAIRN:
1642                 chip_name = "PITCAIRN";
1643                 new_chip_name = "pitcairn";
1644                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1645                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1646                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1647                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1648                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1649                 mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1650                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1651                 break;
1652         case CHIP_VERDE:
1653                 chip_name = "VERDE";
1654                 new_chip_name = "verde";
1655                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1656                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1657                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1658                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1659                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1660                 mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1661                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1662                 break;
1663         case CHIP_OLAND:
1664                 chip_name = "OLAND";
1665                 new_chip_name = "oland";
1666                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1667                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1668                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1669                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1670                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1671                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1672                 break;
1673         case CHIP_HAINAN:
1674                 chip_name = "HAINAN";
1675                 new_chip_name = "hainan";
1676                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1677                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1678                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1679                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1680                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1681                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1682                 break;
1683         default: BUG();
1684         }
1685
1686         DRM_INFO("Loading %s Microcode\n", new_chip_name);
1687
1688         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1689         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1690         if (err) {
1691                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1692                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1693                 if (err)
1694                         goto out;
1695                 if (rdev->pfp_fw->size != pfp_req_size) {
1696                         printk(KERN_ERR
1697                                "si_cp: Bogus length %zu in firmware \"%s\"\n",
1698                                rdev->pfp_fw->size, fw_name);
1699                         err = -EINVAL;
1700                         goto out;
1701                 }
1702         } else {
1703                 err = radeon_ucode_validate(rdev->pfp_fw);
1704                 if (err) {
1705                         printk(KERN_ERR
1706                                "si_cp: validation failed for firmware \"%s\"\n",
1707                                fw_name);
1708                         goto out;
1709                 } else {
1710                         new_fw++;
1711                 }
1712         }
1713
1714         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1715         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1716         if (err) {
1717                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1718                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1719                 if (err)
1720                         goto out;
1721                 if (rdev->me_fw->size != me_req_size) {
1722                         printk(KERN_ERR
1723                                "si_cp: Bogus length %zu in firmware \"%s\"\n",
1724                                rdev->me_fw->size, fw_name);
1725                         err = -EINVAL;
1726                 }
1727         } else {
1728                 err = radeon_ucode_validate(rdev->me_fw);
1729                 if (err) {
1730                         printk(KERN_ERR
1731                                "si_cp: validation failed for firmware \"%s\"\n",
1732                                fw_name);
1733                         goto out;
1734                 } else {
1735                         new_fw++;
1736                 }
1737         }
1738
1739         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1740         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1741         if (err) {
1742                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1743                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1744                 if (err)
1745                         goto out;
1746                 if (rdev->ce_fw->size != ce_req_size) {
1747                         printk(KERN_ERR
1748                                "si_cp: Bogus length %zu in firmware \"%s\"\n",
1749                                rdev->ce_fw->size, fw_name);
1750                         err = -EINVAL;
1751                 }
1752         } else {
1753                 err = radeon_ucode_validate(rdev->ce_fw);
1754                 if (err) {
1755                         printk(KERN_ERR
1756                                "si_cp: validation failed for firmware \"%s\"\n",
1757                                fw_name);
1758                         goto out;
1759                 } else {
1760                         new_fw++;
1761                 }
1762         }
1763
1764         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1765         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1766         if (err) {
1767                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1768                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1769                 if (err)
1770                         goto out;
1771                 if (rdev->rlc_fw->size != rlc_req_size) {
1772                         printk(KERN_ERR
1773                                "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1774                                rdev->rlc_fw->size, fw_name);
1775                         err = -EINVAL;
1776                 }
1777         } else {
1778                 err = radeon_ucode_validate(rdev->rlc_fw);
1779                 if (err) {
1780                         printk(KERN_ERR
1781                                "si_cp: validation failed for firmware \"%s\"\n",
1782                                fw_name);
1783                         goto out;
1784                 } else {
1785                         new_fw++;
1786                 }
1787         }
1788
1789         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1790         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1791         if (err) {
1792                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1793                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1794                 if (err) {
1795                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1796                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1797                         if (err)
1798                                 goto out;
1799                 }
1800                 if ((rdev->mc_fw->size != mc_req_size) &&
1801                     (rdev->mc_fw->size != mc2_req_size)) {
1802                         printk(KERN_ERR
1803                                "si_mc: Bogus length %zu in firmware \"%s\"\n",
1804                                rdev->mc_fw->size, fw_name);
1805                         err = -EINVAL;
1806                 }
1807                 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1808         } else {
1809                 err = radeon_ucode_validate(rdev->mc_fw);
1810                 if (err) {
1811                         printk(KERN_ERR
1812                                "si_cp: validation failed for firmware \"%s\"\n",
1813                                fw_name);
1814                         goto out;
1815                 } else {
1816                         new_fw++;
1817                 }
1818         }
1819
1820         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1821         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1822         if (err) {
1823                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1824                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1825                 if (err) {
1826                         printk(KERN_ERR
1827                                "smc: error loading firmware \"%s\"\n",
1828                                fw_name);
1829                         release_firmware(rdev->smc_fw);
1830                         rdev->smc_fw = NULL;
1831                         err = 0;
1832                 } else if (rdev->smc_fw->size != smc_req_size) {
1833                         printk(KERN_ERR
1834                                "si_smc: Bogus length %zu in firmware \"%s\"\n",
1835                                rdev->smc_fw->size, fw_name);
1836                         err = -EINVAL;
1837                 }
1838         } else {
1839                 err = radeon_ucode_validate(rdev->smc_fw);
1840                 if (err) {
1841                         printk(KERN_ERR
1842                                "si_cp: validation failed for firmware \"%s\"\n",
1843                                fw_name);
1844                         goto out;
1845                 } else {
1846                         new_fw++;
1847                 }
1848         }
1849
1850         if (new_fw == 0) {
1851                 rdev->new_fw = false;
1852         } else if (new_fw < 6) {
1853                 printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1854                 err = -EINVAL;
1855         } else {
1856                 rdev->new_fw = true;
1857         }
1858 out:
1859         if (err) {
1860                 if (err != -EINVAL)
1861                         printk(KERN_ERR
1862                                "si_cp: Failed to load firmware \"%s\"\n",
1863                                fw_name);
1864                 release_firmware(rdev->pfp_fw);
1865                 rdev->pfp_fw = NULL;
1866                 release_firmware(rdev->me_fw);
1867                 rdev->me_fw = NULL;
1868                 release_firmware(rdev->ce_fw);
1869                 rdev->ce_fw = NULL;
1870                 release_firmware(rdev->rlc_fw);
1871                 rdev->rlc_fw = NULL;
1872                 release_firmware(rdev->mc_fw);
1873                 rdev->mc_fw = NULL;
1874                 release_firmware(rdev->smc_fw);
1875                 rdev->smc_fw = NULL;
1876         }
1877         return err;
1878 }
1879
1880 /* watermark setup */
1881 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1882                                    struct radeon_crtc *radeon_crtc,
1883                                    struct drm_display_mode *mode,
1884                                    struct drm_display_mode *other_mode)
1885 {
1886         u32 tmp, buffer_alloc, i;
1887         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1888         /*
1889          * Line Buffer Setup
1890          * There are 3 line buffers, each one shared by 2 display controllers.
1891          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1892          * the display controllers.  The paritioning is done via one of four
1893          * preset allocations specified in bits 21:20:
1894          *  0 - half lb
1895          *  2 - whole lb, other crtc must be disabled
1896          */
1897         /* this can get tricky if we have two large displays on a paired group
1898          * of crtcs.  Ideally for multiple large displays we'd assign them to
1899          * non-linked crtcs for maximum line buffer allocation.
1900          */
1901         if (radeon_crtc->base.enabled && mode) {
1902                 if (other_mode) {
1903                         tmp = 0; /* 1/2 */
1904                         buffer_alloc = 1;
1905                 } else {
1906                         tmp = 2; /* whole */
1907                         buffer_alloc = 2;
1908                 }
1909         } else {
1910                 tmp = 0;
1911                 buffer_alloc = 0;
1912         }
1913
1914         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1915                DC_LB_MEMORY_CONFIG(tmp));
1916
1917         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1918                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1919         for (i = 0; i < rdev->usec_timeout; i++) {
1920                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1921                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
1922                         break;
1923                 udelay(1);
1924         }
1925
1926         if (radeon_crtc->base.enabled && mode) {
1927                 switch (tmp) {
1928                 case 0:
1929                 default:
1930                         return 4096 * 2;
1931                 case 2:
1932                         return 8192 * 2;
1933                 }
1934         }
1935
1936         /* controller not enabled, so no lb used */
1937         return 0;
1938 }
1939
1940 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1941 {
1942         u32 tmp = RREG32(MC_SHARED_CHMAP);
1943
1944         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1945         case 0:
1946         default:
1947                 return 1;
1948         case 1:
1949                 return 2;
1950         case 2:
1951                 return 4;
1952         case 3:
1953                 return 8;
1954         case 4:
1955                 return 3;
1956         case 5:
1957                 return 6;
1958         case 6:
1959                 return 10;
1960         case 7:
1961                 return 12;
1962         case 8:
1963                 return 16;
1964         }
1965 }
1966
1967 struct dce6_wm_params {
1968         u32 dram_channels; /* number of dram channels */
1969         u32 yclk;          /* bandwidth per dram data pin in kHz */
1970         u32 sclk;          /* engine clock in kHz */
1971         u32 disp_clk;      /* display clock in kHz */
1972         u32 src_width;     /* viewport width */
1973         u32 active_time;   /* active display time in ns */
1974         u32 blank_time;    /* blank time in ns */
1975         bool interlaced;    /* mode is interlaced */
1976         fixed20_12 vsc;    /* vertical scale ratio */
1977         u32 num_heads;     /* number of active crtcs */
1978         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1979         u32 lb_size;       /* line buffer allocated to pipe */
1980         u32 vtaps;         /* vertical scaler taps */
1981 };
1982
1983 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1984 {
1985         /* Calculate raw DRAM Bandwidth */
1986         fixed20_12 dram_efficiency; /* 0.7 */
1987         fixed20_12 yclk, dram_channels, bandwidth;
1988         fixed20_12 a;
1989
1990         a.full = dfixed_const(1000);
1991         yclk.full = dfixed_const(wm->yclk);
1992         yclk.full = dfixed_div(yclk, a);
1993         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1994         a.full = dfixed_const(10);
1995         dram_efficiency.full = dfixed_const(7);
1996         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1997         bandwidth.full = dfixed_mul(dram_channels, yclk);
1998         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1999
2000         return dfixed_trunc(bandwidth);
2001 }
2002
2003 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2004 {
2005         /* Calculate DRAM Bandwidth and the part allocated to display. */
2006         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2007         fixed20_12 yclk, dram_channels, bandwidth;
2008         fixed20_12 a;
2009
2010         a.full = dfixed_const(1000);
2011         yclk.full = dfixed_const(wm->yclk);
2012         yclk.full = dfixed_div(yclk, a);
2013         dram_channels.full = dfixed_const(wm->dram_channels * 4);
2014         a.full = dfixed_const(10);
2015         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2016         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2017         bandwidth.full = dfixed_mul(dram_channels, yclk);
2018         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2019
2020         return dfixed_trunc(bandwidth);
2021 }
2022
2023 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2024 {
2025         /* Calculate the display Data return Bandwidth */
2026         fixed20_12 return_efficiency; /* 0.8 */
2027         fixed20_12 sclk, bandwidth;
2028         fixed20_12 a;
2029
2030         a.full = dfixed_const(1000);
2031         sclk.full = dfixed_const(wm->sclk);
2032         sclk.full = dfixed_div(sclk, a);
2033         a.full = dfixed_const(10);
2034         return_efficiency.full = dfixed_const(8);
2035         return_efficiency.full = dfixed_div(return_efficiency, a);
2036         a.full = dfixed_const(32);
2037         bandwidth.full = dfixed_mul(a, sclk);
2038         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2039
2040         return dfixed_trunc(bandwidth);
2041 }
2042
2043 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2044 {
2045         return 32;
2046 }
2047
2048 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2049 {
2050         /* Calculate the DMIF Request Bandwidth */
2051         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2052         fixed20_12 disp_clk, sclk, bandwidth;
2053         fixed20_12 a, b1, b2;
2054         u32 min_bandwidth;
2055
2056         a.full = dfixed_const(1000);
2057         disp_clk.full = dfixed_const(wm->disp_clk);
2058         disp_clk.full = dfixed_div(disp_clk, a);
2059         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2060         b1.full = dfixed_mul(a, disp_clk);
2061
2062         a.full = dfixed_const(1000);
2063         sclk.full = dfixed_const(wm->sclk);
2064         sclk.full = dfixed_div(sclk, a);
2065         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2066         b2.full = dfixed_mul(a, sclk);
2067
2068         a.full = dfixed_const(10);
2069         disp_clk_request_efficiency.full = dfixed_const(8);
2070         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2071
2072         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2073
2074         a.full = dfixed_const(min_bandwidth);
2075         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2076
2077         return dfixed_trunc(bandwidth);
2078 }
2079
2080 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2081 {
2082         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2083         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2084         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2085         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2086
2087         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2088 }
2089
2090 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2091 {
2092         /* Calculate the display mode Average Bandwidth
2093          * DisplayMode should contain the source and destination dimensions,
2094          * timing, etc.
2095          */
2096         fixed20_12 bpp;
2097         fixed20_12 line_time;
2098         fixed20_12 src_width;
2099         fixed20_12 bandwidth;
2100         fixed20_12 a;
2101
2102         a.full = dfixed_const(1000);
2103         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2104         line_time.full = dfixed_div(line_time, a);
2105         bpp.full = dfixed_const(wm->bytes_per_pixel);
2106         src_width.full = dfixed_const(wm->src_width);
2107         bandwidth.full = dfixed_mul(src_width, bpp);
2108         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2109         bandwidth.full = dfixed_div(bandwidth, line_time);
2110
2111         return dfixed_trunc(bandwidth);
2112 }
2113
2114 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2115 {
2116         /* First calcualte the latency in ns */
2117         u32 mc_latency = 2000; /* 2000 ns. */
2118         u32 available_bandwidth = dce6_available_bandwidth(wm);
2119         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2120         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2121         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2122         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2123                 (wm->num_heads * cursor_line_pair_return_time);
2124         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2125         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2126         u32 tmp, dmif_size = 12288;
2127         fixed20_12 a, b, c;
2128
2129         if (wm->num_heads == 0)
2130                 return 0;
2131
2132         a.full = dfixed_const(2);
2133         b.full = dfixed_const(1);
2134         if ((wm->vsc.full > a.full) ||
2135             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2136             (wm->vtaps >= 5) ||
2137             ((wm->vsc.full >= a.full) && wm->interlaced))
2138                 max_src_lines_per_dst_line = 4;
2139         else
2140                 max_src_lines_per_dst_line = 2;
2141
2142         a.full = dfixed_const(available_bandwidth);
2143         b.full = dfixed_const(wm->num_heads);
2144         a.full = dfixed_div(a, b);
2145
2146         b.full = dfixed_const(mc_latency + 512);
2147         c.full = dfixed_const(wm->disp_clk);
2148         b.full = dfixed_div(b, c);
2149
2150         c.full = dfixed_const(dmif_size);
2151         b.full = dfixed_div(c, b);
2152
2153         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2154
2155         b.full = dfixed_const(1000);
2156         c.full = dfixed_const(wm->disp_clk);
2157         b.full = dfixed_div(c, b);
2158         c.full = dfixed_const(wm->bytes_per_pixel);
2159         b.full = dfixed_mul(b, c);
2160
2161         lb_fill_bw = min(tmp, dfixed_trunc(b));
2162
2163         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2164         b.full = dfixed_const(1000);
2165         c.full = dfixed_const(lb_fill_bw);
2166         b.full = dfixed_div(c, b);
2167         a.full = dfixed_div(a, b);
2168         line_fill_time = dfixed_trunc(a);
2169
2170         if (line_fill_time < wm->active_time)
2171                 return latency;
2172         else
2173                 return latency + (line_fill_time - wm->active_time);
2174
2175 }
2176
2177 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2178 {
2179         if (dce6_average_bandwidth(wm) <=
2180             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2181                 return true;
2182         else
2183                 return false;
2184 };
2185
2186 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2187 {
2188         if (dce6_average_bandwidth(wm) <=
2189             (dce6_available_bandwidth(wm) / wm->num_heads))
2190                 return true;
2191         else
2192                 return false;
2193 };
2194
2195 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2196 {
2197         u32 lb_partitions = wm->lb_size / wm->src_width;
2198         u32 line_time = wm->active_time + wm->blank_time;
2199         u32 latency_tolerant_lines;
2200         u32 latency_hiding;
2201         fixed20_12 a;
2202
2203         a.full = dfixed_const(1);
2204         if (wm->vsc.full > a.full)
2205                 latency_tolerant_lines = 1;
2206         else {
2207                 if (lb_partitions <= (wm->vtaps + 1))
2208                         latency_tolerant_lines = 1;
2209                 else
2210                         latency_tolerant_lines = 2;
2211         }
2212
2213         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2214
2215         if (dce6_latency_watermark(wm) <= latency_hiding)
2216                 return true;
2217         else
2218                 return false;
2219 }
2220
2221 static void dce6_program_watermarks(struct radeon_device *rdev,
2222                                          struct radeon_crtc *radeon_crtc,
2223                                          u32 lb_size, u32 num_heads)
2224 {
2225         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2226         struct dce6_wm_params wm_low, wm_high;
2227         u32 dram_channels;
2228         u32 pixel_period;
2229         u32 line_time = 0;
2230         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2231         u32 priority_a_mark = 0, priority_b_mark = 0;
2232         u32 priority_a_cnt = PRIORITY_OFF;
2233         u32 priority_b_cnt = PRIORITY_OFF;
2234         u32 tmp, arb_control3;
2235         fixed20_12 a, b, c;
2236
2237         if (radeon_crtc->base.enabled && num_heads && mode) {
2238                 pixel_period = 1000000 / (u32)mode->clock;
2239                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2240                 priority_a_cnt = 0;
2241                 priority_b_cnt = 0;
2242
2243                 if (rdev->family == CHIP_ARUBA)
2244                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2245                 else
2246                         dram_channels = si_get_number_of_dram_channels(rdev);
2247
2248                 /* watermark for high clocks */
2249                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2250                         wm_high.yclk =
2251                                 radeon_dpm_get_mclk(rdev, false) * 10;
2252                         wm_high.sclk =
2253                                 radeon_dpm_get_sclk(rdev, false) * 10;
2254                 } else {
2255                         wm_high.yclk = rdev->pm.current_mclk * 10;
2256                         wm_high.sclk = rdev->pm.current_sclk * 10;
2257                 }
2258
2259                 wm_high.disp_clk = mode->clock;
2260                 wm_high.src_width = mode->crtc_hdisplay;
2261                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2262                 wm_high.blank_time = line_time - wm_high.active_time;
2263                 wm_high.interlaced = false;
2264                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2265                         wm_high.interlaced = true;
2266                 wm_high.vsc = radeon_crtc->vsc;
2267                 wm_high.vtaps = 1;
2268                 if (radeon_crtc->rmx_type != RMX_OFF)
2269                         wm_high.vtaps = 2;
2270                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2271                 wm_high.lb_size = lb_size;
2272                 wm_high.dram_channels = dram_channels;
2273                 wm_high.num_heads = num_heads;
2274
2275                 /* watermark for low clocks */
2276                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2277                         wm_low.yclk =
2278                                 radeon_dpm_get_mclk(rdev, true) * 10;
2279                         wm_low.sclk =
2280                                 radeon_dpm_get_sclk(rdev, true) * 10;
2281                 } else {
2282                         wm_low.yclk = rdev->pm.current_mclk * 10;
2283                         wm_low.sclk = rdev->pm.current_sclk * 10;
2284                 }
2285
2286                 wm_low.disp_clk = mode->clock;
2287                 wm_low.src_width = mode->crtc_hdisplay;
2288                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2289                 wm_low.blank_time = line_time - wm_low.active_time;
2290                 wm_low.interlaced = false;
2291                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2292                         wm_low.interlaced = true;
2293                 wm_low.vsc = radeon_crtc->vsc;
2294                 wm_low.vtaps = 1;
2295                 if (radeon_crtc->rmx_type != RMX_OFF)
2296                         wm_low.vtaps = 2;
2297                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2298                 wm_low.lb_size = lb_size;
2299                 wm_low.dram_channels = dram_channels;
2300                 wm_low.num_heads = num_heads;
2301
2302                 /* set for high clocks */
2303                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2304                 /* set for low clocks */
2305                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2306
2307                 /* possibly force display priority to high */
2308                 /* should really do this at mode validation time... */
2309                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2310                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2311                     !dce6_check_latency_hiding(&wm_high) ||
2312                     (rdev->disp_priority == 2)) {
2313                         DRM_DEBUG_KMS("force priority to high\n");
2314                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2315                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2316                 }
2317                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2318                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2319                     !dce6_check_latency_hiding(&wm_low) ||
2320                     (rdev->disp_priority == 2)) {
2321                         DRM_DEBUG_KMS("force priority to high\n");
2322                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2323                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2324                 }
2325
2326                 a.full = dfixed_const(1000);
2327                 b.full = dfixed_const(mode->clock);
2328                 b.full = dfixed_div(b, a);
2329                 c.full = dfixed_const(latency_watermark_a);
2330                 c.full = dfixed_mul(c, b);
2331                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2332                 c.full = dfixed_div(c, a);
2333                 a.full = dfixed_const(16);
2334                 c.full = dfixed_div(c, a);
2335                 priority_a_mark = dfixed_trunc(c);
2336                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2337
2338                 a.full = dfixed_const(1000);
2339                 b.full = dfixed_const(mode->clock);
2340                 b.full = dfixed_div(b, a);
2341                 c.full = dfixed_const(latency_watermark_b);
2342                 c.full = dfixed_mul(c, b);
2343                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2344                 c.full = dfixed_div(c, a);
2345                 a.full = dfixed_const(16);
2346                 c.full = dfixed_div(c, a);
2347                 priority_b_mark = dfixed_trunc(c);
2348                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2349         }
2350
2351         /* select wm A */
2352         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2353         tmp = arb_control3;
2354         tmp &= ~LATENCY_WATERMARK_MASK(3);
2355         tmp |= LATENCY_WATERMARK_MASK(1);
2356         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2357         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2358                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2359                 LATENCY_HIGH_WATERMARK(line_time)));
2360         /* select wm B */
2361         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2362         tmp &= ~LATENCY_WATERMARK_MASK(3);
2363         tmp |= LATENCY_WATERMARK_MASK(2);
2364         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2365         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2366                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2367                 LATENCY_HIGH_WATERMARK(line_time)));
2368         /* restore original selection */
2369         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2370
2371         /* write the priority marks */
2372         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2373         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2374
2375         /* save values for DPM */
2376         radeon_crtc->line_time = line_time;
2377         radeon_crtc->wm_high = latency_watermark_a;
2378         radeon_crtc->wm_low = latency_watermark_b;
2379 }
2380
2381 void dce6_bandwidth_update(struct radeon_device *rdev)
2382 {
2383         struct drm_display_mode *mode0 = NULL;
2384         struct drm_display_mode *mode1 = NULL;
2385         u32 num_heads = 0, lb_size;
2386         int i;
2387
2388         if (!rdev->mode_info.mode_config_initialized)
2389                 return;
2390
2391         radeon_update_display_priority(rdev);
2392
2393         for (i = 0; i < rdev->num_crtc; i++) {
2394                 if (rdev->mode_info.crtcs[i]->base.enabled)
2395                         num_heads++;
2396         }
2397         for (i = 0; i < rdev->num_crtc; i += 2) {
2398                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2399                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2400                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2401                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2402                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2403                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2404         }
2405 }
2406
2407 /*
2408  * Core functions
2409  */
2410 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2411 {
2412         const u32 num_tile_mode_states = 32;
2413         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2414
2415         switch (rdev->config.si.mem_row_size_in_kb) {
2416         case 1:
2417                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2418                 break;
2419         case 2:
2420         default:
2421                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2422                 break;
2423         case 4:
2424                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2425                 break;
2426         }
2427
2428         if ((rdev->family == CHIP_TAHITI) ||
2429             (rdev->family == CHIP_PITCAIRN)) {
2430                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2431                         switch (reg_offset) {
2432                         case 0:  /* non-AA compressed depth or any compressed stencil */
2433                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2434                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2435                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2436                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2437                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2438                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2440                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2441                                 break;
2442                         case 1:  /* 2xAA/4xAA compressed depth only */
2443                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2445                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2446                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2447                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2448                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2451                                 break;
2452                         case 2:  /* 8xAA compressed depth only */
2453                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2455                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2456                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2457                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2458                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2460                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2461                                 break;
2462                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2463                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2465                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2466                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2467                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2468                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2470                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2471                                 break;
2472                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2473                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2474                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2475                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2476                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2477                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2478                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2480                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2481                                 break;
2482                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2483                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2485                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2486                                                  TILE_SPLIT(split_equal_to_row_size) |
2487                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2488                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2490                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2491                                 break;
2492                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2493                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2494                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2495                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2496                                                  TILE_SPLIT(split_equal_to_row_size) |
2497                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2498                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2501                                 break;
2502                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2503                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2505                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2506                                                  TILE_SPLIT(split_equal_to_row_size) |
2507                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2508                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2510                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2511                                 break;
2512                         case 8:  /* 1D and 1D Array Surfaces */
2513                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2514                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2515                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2516                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2517                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2518                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2519                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2520                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2521                                 break;
2522                         case 9:  /* Displayable maps. */
2523                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2524                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2525                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2526                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2527                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2528                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2530                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2531                                 break;
2532                         case 10:  /* Display 8bpp. */
2533                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2535                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2536                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2537                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2538                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2541                                 break;
2542                         case 11:  /* Display 16bpp. */
2543                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2546                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2547                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2548                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2550                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2551                                 break;
2552                         case 12:  /* Display 32bpp. */
2553                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2556                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2557                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2558                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2559                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2560                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2561                                 break;
2562                         case 13:  /* Thin. */
2563                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2564                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2565                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2566                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2567                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2568                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2570                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2571                                 break;
2572                         case 14:  /* Thin 8 bpp. */
2573                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2574                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2575                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2576                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2577                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2578                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2580                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2581                                 break;
2582                         case 15:  /* Thin 16 bpp. */
2583                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2585                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2587                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2588                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2590                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2591                                 break;
2592                         case 16:  /* Thin 32 bpp. */
2593                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2595                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2596                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2597                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2598                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2600                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2601                                 break;
2602                         case 17:  /* Thin 64 bpp. */
2603                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2605                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2606                                                  TILE_SPLIT(split_equal_to_row_size) |
2607                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2608                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2611                                 break;
2612                         case 21:  /* 8 bpp PRT. */
2613                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2614                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2615                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2616                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2617                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2618                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2619                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2620                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2621                                 break;
2622                         case 22:  /* 16 bpp PRT */
2623                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2625                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2626                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2627                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2628                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2630                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2631                                 break;
2632                         case 23:  /* 32 bpp PRT */
2633                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2634                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2635                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2636                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2637                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2638                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2640                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2641                                 break;
2642                         case 24:  /* 64 bpp PRT */
2643                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2645                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2646                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2647                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2648                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2650                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2651                                 break;
2652                         case 25:  /* 128 bpp PRT */
2653                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2655                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2656                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2657                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2658                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2660                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2661                                 break;
2662                         default:
2663                                 gb_tile_moden = 0;
2664                                 break;
2665                         }
2666                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2667                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2668                 }
2669         } else if ((rdev->family == CHIP_VERDE) ||
2670                    (rdev->family == CHIP_OLAND) ||
2671                    (rdev->family == CHIP_HAINAN)) {
2672                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2673                         switch (reg_offset) {
2674                         case 0:  /* non-AA compressed depth or any compressed stencil */
2675                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2676                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2677                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2678                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2679                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2680                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2682                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2683                                 break;
2684                         case 1:  /* 2xAA/4xAA compressed depth only */
2685                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2687                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2688                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2689                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2690                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2693                                 break;
2694                         case 2:  /* 8xAA compressed depth only */
2695                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2697                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2698                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2699                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2700                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2702                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2703                                 break;
2704                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2705                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2707                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2708                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2709                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2710                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2712                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2713                                 break;
2714                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2715                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2716                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2717                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2718                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2719                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2720                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2722                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2723                                 break;
2724                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2725                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2726                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2727                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2728                                                  TILE_SPLIT(split_equal_to_row_size) |
2729                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2730                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2732                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2733                                 break;
2734                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2735                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2736                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2737                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2738                                                  TILE_SPLIT(split_equal_to_row_size) |
2739                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2740                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2742                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2743                                 break;
2744                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2745                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2747                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748                                                  TILE_SPLIT(split_equal_to_row_size) |
2749                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2750                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2752                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2753                                 break;
2754                         case 8:  /* 1D and 1D Array Surfaces */
2755                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2756                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2757                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2758                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2759                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2760                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2762                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2763                                 break;
2764                         case 9:  /* Displayable maps. */
2765                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2766                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2767                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2769                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2770                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2772                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2773                                 break;
2774                         case 10:  /* Display 8bpp. */
2775                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2777                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2779                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2780                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2782                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2783                                 break;
2784                         case 11:  /* Display 16bpp. */
2785                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2786                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2787                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2788                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2789                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2790                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2792                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2793                                 break;
2794                         case 12:  /* Display 32bpp. */
2795                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2797                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2799                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2800                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2801                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2802                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2803                                 break;
2804                         case 13:  /* Thin. */
2805                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2806                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2807                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2808                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2809                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2810                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2811                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2812                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2813                                 break;
2814                         case 14:  /* Thin 8 bpp. */
2815                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2816                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2817                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2818                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2819                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2820                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2822                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2823                                 break;
2824                         case 15:  /* Thin 16 bpp. */
2825                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2827                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2828                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2829                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2830                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2831                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2832                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2833                                 break;
2834                         case 16:  /* Thin 32 bpp. */
2835                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2836                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2837                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2838                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2839                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2840                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2843                                 break;
2844                         case 17:  /* Thin 64 bpp. */
2845                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2847                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2848                                                  TILE_SPLIT(split_equal_to_row_size) |
2849                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2850                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2851                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2852                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2853                                 break;
2854                         case 21:  /* 8 bpp PRT. */
2855                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2856                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2857                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2858                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2859                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2860                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2861                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2862                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2863                                 break;
2864                         case 22:  /* 16 bpp PRT */
2865                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2866                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2867                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2868                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2869                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2870                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2872                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2873                                 break;
2874                         case 23:  /* 32 bpp PRT */
2875                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2877                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2878                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2879                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2880                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2882                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2883                                 break;
2884                         case 24:  /* 64 bpp PRT */
2885                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2887                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2888                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2889                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2890                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2891                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2892                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2893                                 break;
2894                         case 25:  /* 128 bpp PRT */
2895                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2897                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2898                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2899                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2900                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2901                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2902                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2903                                 break;
2904                         default:
2905                                 gb_tile_moden = 0;
2906                                 break;
2907                         }
2908                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2909                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2910                 }
2911         } else
2912                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2913 }
2914
2915 static void si_select_se_sh(struct radeon_device *rdev,
2916                             u32 se_num, u32 sh_num)
2917 {
2918         u32 data = INSTANCE_BROADCAST_WRITES;
2919
2920         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2921                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2922         else if (se_num == 0xffffffff)
2923                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2924         else if (sh_num == 0xffffffff)
2925                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2926         else
2927                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2928         WREG32(GRBM_GFX_INDEX, data);
2929 }
2930
2931 static u32 si_create_bitmask(u32 bit_width)
2932 {
2933         u32 i, mask = 0;
2934
2935         for (i = 0; i < bit_width; i++) {
2936                 mask <<= 1;
2937                 mask |= 1;
2938         }
2939         return mask;
2940 }
2941
2942 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2943 {
2944         u32 data, mask;
2945
2946         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2947         if (data & 1)
2948                 data &= INACTIVE_CUS_MASK;
2949         else
2950                 data = 0;
2951         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2952
2953         data >>= INACTIVE_CUS_SHIFT;
2954
2955         mask = si_create_bitmask(cu_per_sh);
2956
2957         return ~data & mask;
2958 }
2959
2960 static void si_setup_spi(struct radeon_device *rdev,
2961                          u32 se_num, u32 sh_per_se,
2962                          u32 cu_per_sh)
2963 {
2964         int i, j, k;
2965         u32 data, mask, active_cu;
2966
2967         for (i = 0; i < se_num; i++) {
2968                 for (j = 0; j < sh_per_se; j++) {
2969                         si_select_se_sh(rdev, i, j);
2970                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2971                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2972
2973                         mask = 1;
2974                         for (k = 0; k < 16; k++) {
2975                                 mask <<= k;
2976                                 if (active_cu & mask) {
2977                                         data &= ~mask;
2978                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2979                                         break;
2980                                 }
2981                         }
2982                 }
2983         }
2984         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2985 }
2986
2987 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2988                               u32 max_rb_num_per_se,
2989                               u32 sh_per_se)
2990 {
2991         u32 data, mask;
2992
2993         data = RREG32(CC_RB_BACKEND_DISABLE);
2994         if (data & 1)
2995                 data &= BACKEND_DISABLE_MASK;
2996         else
2997                 data = 0;
2998         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2999
3000         data >>= BACKEND_DISABLE_SHIFT;
3001
3002         mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3003
3004         return data & mask;
3005 }
3006
3007 static void si_setup_rb(struct radeon_device *rdev,
3008                         u32 se_num, u32 sh_per_se,
3009                         u32 max_rb_num_per_se)
3010 {
3011         int i, j;
3012         u32 data, mask;
3013         u32 disabled_rbs = 0;
3014         u32 enabled_rbs = 0;
3015
3016         for (i = 0; i < se_num; i++) {
3017                 for (j = 0; j < sh_per_se; j++) {
3018                         si_select_se_sh(rdev, i, j);
3019                         data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3020                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3021                 }
3022         }
3023         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3024
3025         mask = 1;
3026         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3027                 if (!(disabled_rbs & mask))
3028                         enabled_rbs |= mask;
3029                 mask <<= 1;
3030         }
3031
3032         rdev->config.si.backend_enable_mask = enabled_rbs;
3033
3034         for (i = 0; i < se_num; i++) {
3035                 si_select_se_sh(rdev, i, 0xffffffff);
3036                 data = 0;
3037                 for (j = 0; j < sh_per_se; j++) {
3038                         switch (enabled_rbs & 3) {
3039                         case 1:
3040                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3041                                 break;
3042                         case 2:
3043                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3044                                 break;
3045                         case 3:
3046                         default:
3047                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3048                                 break;
3049                         }
3050                         enabled_rbs >>= 2;
3051                 }
3052                 WREG32(PA_SC_RASTER_CONFIG, data);
3053         }
3054         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3055 }
3056
3057 static void si_gpu_init(struct radeon_device *rdev)
3058 {
3059         u32 gb_addr_config = 0;
3060         u32 mc_shared_chmap, mc_arb_ramcfg;
3061         u32 sx_debug_1;
3062         u32 hdp_host_path_cntl;
3063         u32 tmp;
3064         int i, j;
3065
3066         switch (rdev->family) {
3067         case CHIP_TAHITI:
3068                 rdev->config.si.max_shader_engines = 2;
3069                 rdev->config.si.max_tile_pipes = 12;
3070                 rdev->config.si.max_cu_per_sh = 8;
3071                 rdev->config.si.max_sh_per_se = 2;
3072                 rdev->config.si.max_backends_per_se = 4;
3073                 rdev->config.si.max_texture_channel_caches = 12;
3074                 rdev->config.si.max_gprs = 256;
3075                 rdev->config.si.max_gs_threads = 32;
3076                 rdev->config.si.max_hw_contexts = 8;
3077
3078                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3079                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3080                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3081                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3082                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3083                 break;
3084         case CHIP_PITCAIRN:
3085                 rdev->config.si.max_shader_engines = 2;
3086                 rdev->config.si.max_tile_pipes = 8;
3087                 rdev->config.si.max_cu_per_sh = 5;
3088                 rdev->config.si.max_sh_per_se = 2;
3089                 rdev->config.si.max_backends_per_se = 4;
3090                 rdev->config.si.max_texture_channel_caches = 8;
3091                 rdev->config.si.max_gprs = 256;
3092                 rdev->config.si.max_gs_threads = 32;
3093                 rdev->config.si.max_hw_contexts = 8;
3094
3095                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3096                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3097                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3098                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3099                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3100                 break;
3101         case CHIP_VERDE:
3102         default:
3103                 rdev->config.si.max_shader_engines = 1;
3104                 rdev->config.si.max_tile_pipes = 4;
3105                 rdev->config.si.max_cu_per_sh = 5;
3106                 rdev->config.si.max_sh_per_se = 2;
3107                 rdev->config.si.max_backends_per_se = 4;
3108                 rdev->config.si.max_texture_channel_caches = 4;
3109                 rdev->config.si.max_gprs = 256;
3110                 rdev->config.si.max_gs_threads = 32;
3111                 rdev->config.si.max_hw_contexts = 8;
3112
3113                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3114                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3115                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3116                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3117                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3118                 break;
3119         case CHIP_OLAND:
3120                 rdev->config.si.max_shader_engines = 1;
3121                 rdev->config.si.max_tile_pipes = 4;
3122                 rdev->config.si.max_cu_per_sh = 6;
3123                 rdev->config.si.max_sh_per_se = 1;
3124                 rdev->config.si.max_backends_per_se = 2;
3125                 rdev->config.si.max_texture_channel_caches = 4;
3126                 rdev->config.si.max_gprs = 256;
3127                 rdev->config.si.max_gs_threads = 16;
3128                 rdev->config.si.max_hw_contexts = 8;
3129
3130                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3131                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3132                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3133                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3134                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3135                 break;
3136         case CHIP_HAINAN:
3137                 rdev->config.si.max_shader_engines = 1;
3138                 rdev->config.si.max_tile_pipes = 4;
3139                 rdev->config.si.max_cu_per_sh = 5;
3140                 rdev->config.si.max_sh_per_se = 1;
3141                 rdev->config.si.max_backends_per_se = 1;
3142                 rdev->config.si.max_texture_channel_caches = 2;
3143                 rdev->config.si.max_gprs = 256;
3144                 rdev->config.si.max_gs_threads = 16;
3145                 rdev->config.si.max_hw_contexts = 8;
3146
3147                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3148                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3149                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3150                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3151                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3152                 break;
3153         }
3154
3155         /* Initialize HDP */
3156         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3157                 WREG32((0x2c14 + j), 0x00000000);
3158                 WREG32((0x2c18 + j), 0x00000000);
3159                 WREG32((0x2c1c + j), 0x00000000);
3160                 WREG32((0x2c20 + j), 0x00000000);
3161                 WREG32((0x2c24 + j), 0x00000000);
3162         }
3163
3164         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3165         WREG32(SRBM_INT_CNTL, 1);
3166         WREG32(SRBM_INT_ACK, 1);
3167
3168         evergreen_fix_pci_max_read_req_size(rdev);
3169
3170         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3171
3172         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3173         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3174
3175         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3176         rdev->config.si.mem_max_burst_length_bytes = 256;
3177         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3178         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3179         if (rdev->config.si.mem_row_size_in_kb > 4)
3180                 rdev->config.si.mem_row_size_in_kb = 4;
3181         /* XXX use MC settings? */
3182         rdev->config.si.shader_engine_tile_size = 32;
3183         rdev->config.si.num_gpus = 1;
3184         rdev->config.si.multi_gpu_tile_size = 64;
3185
3186         /* fix up row size */
3187         gb_addr_config &= ~ROW_SIZE_MASK;
3188         switch (rdev->config.si.mem_row_size_in_kb) {
3189         case 1:
3190         default:
3191                 gb_addr_config |= ROW_SIZE(0);
3192                 break;
3193         case 2:
3194                 gb_addr_config |= ROW_SIZE(1);
3195                 break;
3196         case 4:
3197                 gb_addr_config |= ROW_SIZE(2);
3198                 break;
3199         }
3200
3201         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3202          * not have bank info, so create a custom tiling dword.
3203          * bits 3:0   num_pipes
3204          * bits 7:4   num_banks
3205          * bits 11:8  group_size
3206          * bits 15:12 row_size
3207          */
3208         rdev->config.si.tile_config = 0;
3209         switch (rdev->config.si.num_tile_pipes) {
3210         case 1:
3211                 rdev->config.si.tile_config |= (0 << 0);
3212                 break;
3213         case 2:
3214                 rdev->config.si.tile_config |= (1 << 0);
3215                 break;
3216         case 4:
3217                 rdev->config.si.tile_config |= (2 << 0);
3218                 break;
3219         case 8:
3220         default:
3221                 /* XXX what about 12? */
3222                 rdev->config.si.tile_config |= (3 << 0);
3223                 break;
3224         }       
3225         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3226         case 0: /* four banks */
3227                 rdev->config.si.tile_config |= 0 << 4;
3228                 break;
3229         case 1: /* eight banks */
3230                 rdev->config.si.tile_config |= 1 << 4;
3231                 break;
3232         case 2: /* sixteen banks */
3233         default:
3234                 rdev->config.si.tile_config |= 2 << 4;
3235                 break;
3236         }
3237         rdev->config.si.tile_config |=
3238                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3239         rdev->config.si.tile_config |=
3240                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3241
3242         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3243         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3244         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3245         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3246         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3247         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3248         if (rdev->has_uvd) {
3249                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3250                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3251                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3252         }
3253
3254         si_tiling_mode_table_init(rdev);
3255
3256         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3257                     rdev->config.si.max_sh_per_se,
3258                     rdev->config.si.max_backends_per_se);
3259
3260         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3261                      rdev->config.si.max_sh_per_se,
3262                      rdev->config.si.max_cu_per_sh);
3263
3264         rdev->config.si.active_cus = 0;
3265         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3266                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3267                         rdev->config.si.active_cus +=
3268                                 hweight32(si_get_cu_active_bitmap(rdev, i, j));
3269                 }
3270         }
3271
3272         /* set HW defaults for 3D engine */
3273         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3274                                      ROQ_IB2_START(0x2b)));
3275         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3276
3277         sx_debug_1 = RREG32(SX_DEBUG_1);
3278         WREG32(SX_DEBUG_1, sx_debug_1);
3279
3280         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3281
3282         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3283                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3284                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3285                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3286
3287         WREG32(VGT_NUM_INSTANCES, 1);
3288
3289         WREG32(CP_PERFMON_CNTL, 0);
3290
3291         WREG32(SQ_CONFIG, 0);
3292
3293         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3294                                           FORCE_EOV_MAX_REZ_CNT(255)));
3295
3296         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3297                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3298
3299         WREG32(VGT_GS_VERTEX_REUSE, 16);
3300         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3301
3302         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3303         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3304         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3305         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3306         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3307         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3308         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3309         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3310
3311         tmp = RREG32(HDP_MISC_CNTL);
3312         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3313         WREG32(HDP_MISC_CNTL, tmp);
3314
3315         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3316         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3317
3318         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3319
3320         udelay(50);
3321 }
3322
3323 /*
3324  * GPU scratch registers helpers function.
3325  */
3326 static void si_scratch_init(struct radeon_device *rdev)
3327 {
3328         int i;
3329
3330         rdev->scratch.num_reg = 7;
3331         rdev->scratch.reg_base = SCRATCH_REG0;
3332         for (i = 0; i < rdev->scratch.num_reg; i++) {
3333                 rdev->scratch.free[i] = true;
3334                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3335         }
3336 }
3337
3338 void si_fence_ring_emit(struct radeon_device *rdev,
3339                         struct radeon_fence *fence)
3340 {
3341         struct radeon_ring *ring = &rdev->ring[fence->ring];
3342         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3343
3344         /* flush read cache over gart */
3345         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3346         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3347         radeon_ring_write(ring, 0);
3348         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3349         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3350                           PACKET3_TC_ACTION_ENA |
3351                           PACKET3_SH_KCACHE_ACTION_ENA |
3352                           PACKET3_SH_ICACHE_ACTION_ENA);
3353         radeon_ring_write(ring, 0xFFFFFFFF);
3354         radeon_ring_write(ring, 0);
3355         radeon_ring_write(ring, 10); /* poll interval */
3356         /* EVENT_WRITE_EOP - flush caches, send int */
3357         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3358         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3359         radeon_ring_write(ring, lower_32_bits(addr));
3360         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3361         radeon_ring_write(ring, fence->seq);
3362         radeon_ring_write(ring, 0);
3363 }
3364
3365 /*
3366  * IB stuff
3367  */
3368 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3369 {
3370         struct radeon_ring *ring = &rdev->ring[ib->ring];
3371         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3372         u32 header;
3373
3374         if (ib->is_const_ib) {
3375                 /* set switch buffer packet before const IB */
3376                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3377                 radeon_ring_write(ring, 0);
3378
3379                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3380         } else {
3381                 u32 next_rptr;
3382                 if (ring->rptr_save_reg) {
3383                         next_rptr = ring->wptr + 3 + 4 + 8;
3384                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3385                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3386                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3387                         radeon_ring_write(ring, next_rptr);
3388                 } else if (rdev->wb.enabled) {
3389                         next_rptr = ring->wptr + 5 + 4 + 8;
3390                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3391                         radeon_ring_write(ring, (1 << 8));
3392                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3393                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3394                         radeon_ring_write(ring, next_rptr);
3395                 }
3396
3397                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3398         }
3399
3400         radeon_ring_write(ring, header);
3401         radeon_ring_write(ring,
3402 #ifdef __BIG_ENDIAN
3403                           (2 << 0) |
3404 #endif
3405                           (ib->gpu_addr & 0xFFFFFFFC));
3406         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3407         radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3408
3409         if (!ib->is_const_ib) {
3410                 /* flush read cache over gart for this vmid */
3411                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3412                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3413                 radeon_ring_write(ring, vm_id);
3414                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3415                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3416                                   PACKET3_TC_ACTION_ENA |
3417                                   PACKET3_SH_KCACHE_ACTION_ENA |
3418                                   PACKET3_SH_ICACHE_ACTION_ENA);
3419                 radeon_ring_write(ring, 0xFFFFFFFF);
3420                 radeon_ring_write(ring, 0);
3421                 radeon_ring_write(ring, 10); /* poll interval */
3422         }
3423 }
3424
3425 /*
3426  * CP.
3427  */
3428 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3429 {
3430         if (enable)
3431                 WREG32(CP_ME_CNTL, 0);
3432         else {
3433                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3434                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3435                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3436                 WREG32(SCRATCH_UMSK, 0);
3437                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3438                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3439                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3440         }
3441         udelay(50);
3442 }
3443
3444 static int si_cp_load_microcode(struct radeon_device *rdev)
3445 {
3446         int i;
3447
3448         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3449                 return -EINVAL;
3450
3451         si_cp_enable(rdev, false);
3452
3453         if (rdev->new_fw) {
3454                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3455                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3456                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3457                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3458                 const struct gfx_firmware_header_v1_0 *me_hdr =
3459                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3460                 const __le32 *fw_data;
3461                 u32 fw_size;
3462
3463                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3464                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3465                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3466
3467                 /* PFP */
3468                 fw_data = (const __le32 *)
3469                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3470                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3471                 WREG32(CP_PFP_UCODE_ADDR, 0);
3472                 for (i = 0; i < fw_size; i++)
3473                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3474                 WREG32(CP_PFP_UCODE_ADDR, 0);
3475
3476                 /* CE */
3477                 fw_data = (const __le32 *)
3478                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3479                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3480                 WREG32(CP_CE_UCODE_ADDR, 0);
3481                 for (i = 0; i < fw_size; i++)
3482                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3483                 WREG32(CP_CE_UCODE_ADDR, 0);
3484
3485                 /* ME */
3486                 fw_data = (const __be32 *)
3487                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3488                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3489                 WREG32(CP_ME_RAM_WADDR, 0);
3490                 for (i = 0; i < fw_size; i++)
3491                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3492                 WREG32(CP_ME_RAM_WADDR, 0);
3493         } else {
3494                 const __be32 *fw_data;
3495
3496                 /* PFP */
3497                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3498                 WREG32(CP_PFP_UCODE_ADDR, 0);
3499                 for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3500                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3501                 WREG32(CP_PFP_UCODE_ADDR, 0);
3502
3503                 /* CE */
3504                 fw_data = (const __be32 *)rdev->ce_fw->data;
3505                 WREG32(CP_CE_UCODE_ADDR, 0);
3506                 for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3507                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3508                 WREG32(CP_CE_UCODE_ADDR, 0);
3509
3510                 /* ME */
3511                 fw_data = (const __be32 *)rdev->me_fw->data;
3512                 WREG32(CP_ME_RAM_WADDR, 0);
3513                 for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3514                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3515                 WREG32(CP_ME_RAM_WADDR, 0);
3516         }
3517
3518         WREG32(CP_PFP_UCODE_ADDR, 0);
3519         WREG32(CP_CE_UCODE_ADDR, 0);
3520         WREG32(CP_ME_RAM_WADDR, 0);
3521         WREG32(CP_ME_RAM_RADDR, 0);
3522         return 0;
3523 }
3524
3525 static int si_cp_start(struct radeon_device *rdev)
3526 {
3527         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3528         int r, i;
3529
3530         r = radeon_ring_lock(rdev, ring, 7 + 4);
3531         if (r) {
3532                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3533                 return r;
3534         }
3535         /* init the CP */
3536         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3537         radeon_ring_write(ring, 0x1);
3538         radeon_ring_write(ring, 0x0);
3539         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3540         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3541         radeon_ring_write(ring, 0);
3542         radeon_ring_write(ring, 0);
3543
3544         /* init the CE partitions */
3545         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3546         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3547         radeon_ring_write(ring, 0xc000);
3548         radeon_ring_write(ring, 0xe000);
3549         radeon_ring_unlock_commit(rdev, ring, false);
3550
3551         si_cp_enable(rdev, true);
3552
3553         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3554         if (r) {
3555                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3556                 return r;
3557         }
3558
3559         /* setup clear context state */
3560         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3561         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3562
3563         for (i = 0; i < si_default_size; i++)
3564                 radeon_ring_write(ring, si_default_state[i]);
3565
3566         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3567         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3568
3569         /* set clear context state */
3570         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3571         radeon_ring_write(ring, 0);
3572
3573         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3574         radeon_ring_write(ring, 0x00000316);
3575         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3576         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3577
3578         radeon_ring_unlock_commit(rdev, ring, false);
3579
3580         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3581                 ring = &rdev->ring[i];
3582                 r = radeon_ring_lock(rdev, ring, 2);
3583
3584                 /* clear the compute context state */
3585                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3586                 radeon_ring_write(ring, 0);
3587
3588                 radeon_ring_unlock_commit(rdev, ring, false);
3589         }
3590
3591         return 0;
3592 }
3593
3594 static void si_cp_fini(struct radeon_device *rdev)
3595 {
3596         struct radeon_ring *ring;
3597         si_cp_enable(rdev, false);
3598
3599         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3600         radeon_ring_fini(rdev, ring);
3601         radeon_scratch_free(rdev, ring->rptr_save_reg);
3602
3603         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3604         radeon_ring_fini(rdev, ring);
3605         radeon_scratch_free(rdev, ring->rptr_save_reg);
3606
3607         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3608         radeon_ring_fini(rdev, ring);
3609         radeon_scratch_free(rdev, ring->rptr_save_reg);
3610 }
3611
3612 static int si_cp_resume(struct radeon_device *rdev)
3613 {
3614         struct radeon_ring *ring;
3615         u32 tmp;
3616         u32 rb_bufsz;
3617         int r;
3618
3619         si_enable_gui_idle_interrupt(rdev, false);
3620
3621         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3622         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3623
3624         /* Set the write pointer delay */
3625         WREG32(CP_RB_WPTR_DELAY, 0);
3626
3627         WREG32(CP_DEBUG, 0);
3628         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3629
3630         /* ring 0 - compute and gfx */
3631         /* Set ring buffer size */
3632         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3633         rb_bufsz = order_base_2(ring->ring_size / 8);
3634         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3635 #ifdef __BIG_ENDIAN
3636         tmp |= BUF_SWAP_32BIT;
3637 #endif
3638         WREG32(CP_RB0_CNTL, tmp);
3639
3640         /* Initialize the ring buffer's read and write pointers */
3641         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3642         ring->wptr = 0;
3643         WREG32(CP_RB0_WPTR, ring->wptr);
3644
3645         /* set the wb address whether it's enabled or not */
3646         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3647         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3648
3649         if (rdev->wb.enabled)
3650                 WREG32(SCRATCH_UMSK, 0xff);
3651         else {
3652                 tmp |= RB_NO_UPDATE;
3653                 WREG32(SCRATCH_UMSK, 0);
3654         }
3655
3656         mdelay(1);
3657         WREG32(CP_RB0_CNTL, tmp);
3658
3659         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3660
3661         /* ring1  - compute only */
3662         /* Set ring buffer size */
3663         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3664         rb_bufsz = order_base_2(ring->ring_size / 8);
3665         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3666 #ifdef __BIG_ENDIAN
3667         tmp |= BUF_SWAP_32BIT;
3668 #endif
3669         WREG32(CP_RB1_CNTL, tmp);
3670
3671         /* Initialize the ring buffer's read and write pointers */
3672         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3673         ring->wptr = 0;
3674         WREG32(CP_RB1_WPTR, ring->wptr);
3675
3676         /* set the wb address whether it's enabled or not */
3677         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3678         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3679
3680         mdelay(1);
3681         WREG32(CP_RB1_CNTL, tmp);
3682
3683         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3684
3685         /* ring2 - compute only */
3686         /* Set ring buffer size */
3687         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3688         rb_bufsz = order_base_2(ring->ring_size / 8);
3689         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3690 #ifdef __BIG_ENDIAN
3691         tmp |= BUF_SWAP_32BIT;
3692 #endif
3693         WREG32(CP_RB2_CNTL, tmp);
3694
3695         /* Initialize the ring buffer's read and write pointers */
3696         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3697         ring->wptr = 0;
3698         WREG32(CP_RB2_WPTR, ring->wptr);
3699
3700         /* set the wb address whether it's enabled or not */
3701         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3702         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3703
3704         mdelay(1);
3705         WREG32(CP_RB2_CNTL, tmp);
3706
3707         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3708
3709         /* start the rings */
3710         si_cp_start(rdev);
3711         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3712         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3713         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3714         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3715         if (r) {
3716                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3717                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3718                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3719                 return r;
3720         }
3721         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3722         if (r) {
3723                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3724         }
3725         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3726         if (r) {
3727                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3728         }
3729
3730         si_enable_gui_idle_interrupt(rdev, true);
3731
3732         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3733                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3734
3735         return 0;
3736 }
3737
3738 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3739 {
3740         u32 reset_mask = 0;
3741         u32 tmp;
3742
3743         /* GRBM_STATUS */
3744         tmp = RREG32(GRBM_STATUS);
3745         if (tmp & (PA_BUSY | SC_BUSY |
3746                    BCI_BUSY | SX_BUSY |
3747                    TA_BUSY | VGT_BUSY |
3748                    DB_BUSY | CB_BUSY |
3749                    GDS_BUSY | SPI_BUSY |
3750                    IA_BUSY | IA_BUSY_NO_DMA))
3751                 reset_mask |= RADEON_RESET_GFX;
3752
3753         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3754                    CP_BUSY | CP_COHERENCY_BUSY))
3755                 reset_mask |= RADEON_RESET_CP;
3756
3757         if (tmp & GRBM_EE_BUSY)
3758                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3759
3760         /* GRBM_STATUS2 */
3761         tmp = RREG32(GRBM_STATUS2);
3762         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3763                 reset_mask |= RADEON_RESET_RLC;
3764
3765         /* DMA_STATUS_REG 0 */
3766         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3767         if (!(tmp & DMA_IDLE))
3768                 reset_mask |= RADEON_RESET_DMA;
3769
3770         /* DMA_STATUS_REG 1 */
3771         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3772         if (!(tmp & DMA_IDLE))
3773                 reset_mask |= RADEON_RESET_DMA1;
3774
3775         /* SRBM_STATUS2 */
3776         tmp = RREG32(SRBM_STATUS2);
3777         if (tmp & DMA_BUSY)
3778                 reset_mask |= RADEON_RESET_DMA;
3779
3780         if (tmp & DMA1_BUSY)
3781                 reset_mask |= RADEON_RESET_DMA1;
3782
3783         /* SRBM_STATUS */
3784         tmp = RREG32(SRBM_STATUS);
3785
3786         if (tmp & IH_BUSY)
3787                 reset_mask |= RADEON_RESET_IH;
3788
3789         if (tmp & SEM_BUSY)
3790                 reset_mask |= RADEON_RESET_SEM;
3791
3792         if (tmp & GRBM_RQ_PENDING)
3793                 reset_mask |= RADEON_RESET_GRBM;
3794
3795         if (tmp & VMC_BUSY)
3796                 reset_mask |= RADEON_RESET_VMC;
3797
3798         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3799                    MCC_BUSY | MCD_BUSY))
3800                 reset_mask |= RADEON_RESET_MC;
3801
3802         if (evergreen_is_display_hung(rdev))
3803                 reset_mask |= RADEON_RESET_DISPLAY;
3804
3805         /* VM_L2_STATUS */
3806         tmp = RREG32(VM_L2_STATUS);
3807         if (tmp & L2_BUSY)
3808                 reset_mask |= RADEON_RESET_VMC;
3809
3810         /* Skip MC reset as it's mostly likely not hung, just busy */
3811         if (reset_mask & RADEON_RESET_MC) {
3812                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3813                 reset_mask &= ~RADEON_RESET_MC;
3814         }
3815
3816         return reset_mask;
3817 }
3818
3819 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3820 {
3821         struct evergreen_mc_save save;
3822         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3823         u32 tmp;
3824
3825         if (reset_mask == 0)
3826                 return;
3827
3828         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3829
3830         evergreen_print_gpu_status_regs(rdev);
3831         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3832                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3833         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3834                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3835
3836         /* disable PG/CG */
3837         si_fini_pg(rdev);
3838         si_fini_cg(rdev);
3839
3840         /* stop the rlc */
3841         si_rlc_stop(rdev);
3842
3843         /* Disable CP parsing/prefetching */
3844         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3845
3846         if (reset_mask & RADEON_RESET_DMA) {
3847                 /* dma0 */
3848                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3849                 tmp &= ~DMA_RB_ENABLE;
3850                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3851         }
3852         if (reset_mask & RADEON_RESET_DMA1) {
3853                 /* dma1 */
3854                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3855                 tmp &= ~DMA_RB_ENABLE;
3856                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3857         }
3858
3859         udelay(50);
3860
3861         evergreen_mc_stop(rdev, &save);
3862         if (evergreen_mc_wait_for_idle(rdev)) {
3863                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3864         }
3865
3866         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3867                 grbm_soft_reset = SOFT_RESET_CB |
3868                         SOFT_RESET_DB |
3869                         SOFT_RESET_GDS |
3870                         SOFT_RESET_PA |
3871                         SOFT_RESET_SC |
3872                         SOFT_RESET_BCI |
3873                         SOFT_RESET_SPI |
3874                         SOFT_RESET_SX |
3875                         SOFT_RESET_TC |
3876                         SOFT_RESET_TA |
3877                         SOFT_RESET_VGT |
3878                         SOFT_RESET_IA;
3879         }
3880
3881         if (reset_mask & RADEON_RESET_CP) {
3882                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3883
3884                 srbm_soft_reset |= SOFT_RESET_GRBM;
3885         }
3886
3887         if (reset_mask & RADEON_RESET_DMA)
3888                 srbm_soft_reset |= SOFT_RESET_DMA;
3889
3890         if (reset_mask & RADEON_RESET_DMA1)
3891                 srbm_soft_reset |= SOFT_RESET_DMA1;
3892
3893         if (reset_mask & RADEON_RESET_DISPLAY)
3894                 srbm_soft_reset |= SOFT_RESET_DC;
3895
3896         if (reset_mask & RADEON_RESET_RLC)
3897                 grbm_soft_reset |= SOFT_RESET_RLC;
3898
3899         if (reset_mask & RADEON_RESET_SEM)
3900                 srbm_soft_reset |= SOFT_RESET_SEM;
3901
3902         if (reset_mask & RADEON_RESET_IH)
3903                 srbm_soft_reset |= SOFT_RESET_IH;
3904
3905         if (reset_mask & RADEON_RESET_GRBM)
3906                 srbm_soft_reset |= SOFT_RESET_GRBM;
3907
3908         if (reset_mask & RADEON_RESET_VMC)
3909                 srbm_soft_reset |= SOFT_RESET_VMC;
3910
3911         if (reset_mask & RADEON_RESET_MC)
3912                 srbm_soft_reset |= SOFT_RESET_MC;
3913
3914         if (grbm_soft_reset) {
3915                 tmp = RREG32(GRBM_SOFT_RESET);
3916                 tmp |= grbm_soft_reset;
3917                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3918                 WREG32(GRBM_SOFT_RESET, tmp);
3919                 tmp = RREG32(GRBM_SOFT_RESET);
3920
3921                 udelay(50);
3922
3923                 tmp &= ~grbm_soft_reset;
3924                 WREG32(GRBM_SOFT_RESET, tmp);
3925                 tmp = RREG32(GRBM_SOFT_RESET);
3926         }
3927
3928         if (srbm_soft_reset) {
3929                 tmp = RREG32(SRBM_SOFT_RESET);
3930                 tmp |= srbm_soft_reset;
3931                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3932                 WREG32(SRBM_SOFT_RESET, tmp);
3933                 tmp = RREG32(SRBM_SOFT_RESET);
3934
3935                 udelay(50);
3936
3937                 tmp &= ~srbm_soft_reset;
3938                 WREG32(SRBM_SOFT_RESET, tmp);
3939                 tmp = RREG32(SRBM_SOFT_RESET);
3940         }
3941
3942         /* Wait a little for things to settle down */
3943         udelay(50);
3944
3945         evergreen_mc_resume(rdev, &save);
3946         udelay(50);
3947
3948         evergreen_print_gpu_status_regs(rdev);
3949 }
3950
3951 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3952 {
3953         u32 tmp, i;
3954
3955         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3956         tmp |= SPLL_BYPASS_EN;
3957         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3958
3959         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3960         tmp |= SPLL_CTLREQ_CHG;
3961         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3962
3963         for (i = 0; i < rdev->usec_timeout; i++) {
3964                 if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3965                         break;
3966                 udelay(1);
3967         }
3968
3969         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3970         tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3971         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3972
3973         tmp = RREG32(MPLL_CNTL_MODE);
3974         tmp &= ~MPLL_MCLK_SEL;
3975         WREG32(MPLL_CNTL_MODE, tmp);
3976 }
3977
3978 static void si_spll_powerdown(struct radeon_device *rdev)
3979 {
3980         u32 tmp;
3981
3982         tmp = RREG32(SPLL_CNTL_MODE);
3983         tmp |= SPLL_SW_DIR_CONTROL;
3984         WREG32(SPLL_CNTL_MODE, tmp);
3985
3986         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3987         tmp |= SPLL_RESET;
3988         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3989
3990         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3991         tmp |= SPLL_SLEEP;
3992         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3993
3994         tmp = RREG32(SPLL_CNTL_MODE);
3995         tmp &= ~SPLL_SW_DIR_CONTROL;
3996         WREG32(SPLL_CNTL_MODE, tmp);
3997 }
3998
3999 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4000 {
4001         struct evergreen_mc_save save;
4002         u32 tmp, i;
4003
4004         dev_info(rdev->dev, "GPU pci config reset\n");
4005
4006         /* disable dpm? */
4007
4008         /* disable cg/pg */
4009         si_fini_pg(rdev);
4010         si_fini_cg(rdev);
4011
4012         /* Disable CP parsing/prefetching */
4013         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4014         /* dma0 */
4015         tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4016         tmp &= ~DMA_RB_ENABLE;
4017         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4018         /* dma1 */
4019         tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4020         tmp &= ~DMA_RB_ENABLE;
4021         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4022         /* XXX other engines? */
4023
4024         /* halt the rlc, disable cp internal ints */
4025         si_rlc_stop(rdev);
4026
4027         udelay(50);
4028
4029         /* disable mem access */
4030         evergreen_mc_stop(rdev, &save);
4031         if (evergreen_mc_wait_for_idle(rdev)) {
4032                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4033         }
4034
4035         /* set mclk/sclk to bypass */
4036         si_set_clk_bypass_mode(rdev);
4037         /* powerdown spll */
4038         si_spll_powerdown(rdev);
4039         /* disable BM */
4040         pci_clear_master(rdev->pdev);
4041         /* reset */
4042         radeon_pci_config_reset(rdev);
4043         /* wait for asic to come out of reset */
4044         for (i = 0; i < rdev->usec_timeout; i++) {
4045                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4046                         break;
4047                 udelay(1);
4048         }
4049 }
4050
4051 int si_asic_reset(struct radeon_device *rdev)
4052 {
4053         u32 reset_mask;
4054
4055         reset_mask = si_gpu_check_soft_reset(rdev);
4056
4057         if (reset_mask)
4058                 r600_set_bios_scratch_engine_hung(rdev, true);
4059
4060         /* try soft reset */
4061         si_gpu_soft_reset(rdev, reset_mask);
4062
4063         reset_mask = si_gpu_check_soft_reset(rdev);
4064
4065         /* try pci config reset */
4066         if (reset_mask && radeon_hard_reset)
4067                 si_gpu_pci_config_reset(rdev);
4068
4069         reset_mask = si_gpu_check_soft_reset(rdev);
4070
4071         if (!reset_mask)
4072                 r600_set_bios_scratch_engine_hung(rdev, false);
4073
4074         return 0;
4075 }
4076
4077 /**
4078  * si_gfx_is_lockup - Check if the GFX engine is locked up
4079  *
4080  * @rdev: radeon_device pointer
4081  * @ring: radeon_ring structure holding ring information
4082  *
4083  * Check if the GFX engine is locked up.
4084  * Returns true if the engine appears to be locked up, false if not.
4085  */
4086 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4087 {
4088         u32 reset_mask = si_gpu_check_soft_reset(rdev);
4089
4090         if (!(reset_mask & (RADEON_RESET_GFX |
4091                             RADEON_RESET_COMPUTE |
4092                             RADEON_RESET_CP))) {
4093                 radeon_ring_lockup_update(rdev, ring);
4094                 return false;
4095         }
4096         return radeon_ring_test_lockup(rdev, ring);
4097 }
4098
4099 /* MC */
4100 static void si_mc_program(struct radeon_device *rdev)
4101 {
4102         struct evergreen_mc_save save;
4103         u32 tmp;
4104         int i, j;
4105
4106         /* Initialize HDP */
4107         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4108                 WREG32((0x2c14 + j), 0x00000000);
4109                 WREG32((0x2c18 + j), 0x00000000);
4110                 WREG32((0x2c1c + j), 0x00000000);
4111                 WREG32((0x2c20 + j), 0x00000000);
4112                 WREG32((0x2c24 + j), 0x00000000);
4113         }
4114         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4115
4116         evergreen_mc_stop(rdev, &save);
4117         if (radeon_mc_wait_for_idle(rdev)) {
4118                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4119         }
4120         if (!ASIC_IS_NODCE(rdev))
4121                 /* Lockout access through VGA aperture*/
4122                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4123         /* Update configuration */
4124         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4125                rdev->mc.vram_start >> 12);
4126         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4127                rdev->mc.vram_end >> 12);
4128         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4129                rdev->vram_scratch.gpu_addr >> 12);
4130         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4131         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4132         WREG32(MC_VM_FB_LOCATION, tmp);
4133         /* XXX double check these! */
4134         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4135         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4136         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4137         WREG32(MC_VM_AGP_BASE, 0);
4138         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4139         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4140         if (radeon_mc_wait_for_idle(rdev)) {
4141                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4142         }
4143         evergreen_mc_resume(rdev, &save);
4144         if (!ASIC_IS_NODCE(rdev)) {
4145                 /* we need to own VRAM, so turn off the VGA renderer here
4146                  * to stop it overwriting our objects */
4147                 rv515_vga_render_disable(rdev);
4148         }
4149 }
4150
4151 void si_vram_gtt_location(struct radeon_device *rdev,
4152                           struct radeon_mc *mc)
4153 {
4154         if (mc->mc_vram_size > 0xFFC0000000ULL) {
4155                 /* leave room for at least 1024M GTT */
4156                 dev_warn(rdev->dev, "limiting VRAM\n");
4157                 mc->real_vram_size = 0xFFC0000000ULL;
4158                 mc->mc_vram_size = 0xFFC0000000ULL;
4159         }
4160         radeon_vram_location(rdev, &rdev->mc, 0);
4161         rdev->mc.gtt_base_align = 0;
4162         radeon_gtt_location(rdev, mc);
4163 }
4164
4165 static int si_mc_init(struct radeon_device *rdev)
4166 {
4167         u32 tmp;
4168         int chansize, numchan;
4169
4170         /* Get VRAM informations */
4171         rdev->mc.vram_is_ddr = true;
4172         tmp = RREG32(MC_ARB_RAMCFG);
4173         if (tmp & CHANSIZE_OVERRIDE) {
4174                 chansize = 16;
4175         } else if (tmp & CHANSIZE_MASK) {
4176                 chansize = 64;
4177         } else {
4178                 chansize = 32;
4179         }
4180         tmp = RREG32(MC_SHARED_CHMAP);
4181         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4182         case 0:
4183         default:
4184                 numchan = 1;
4185                 break;
4186         case 1:
4187                 numchan = 2;
4188                 break;
4189         case 2:
4190                 numchan = 4;
4191                 break;
4192         case 3:
4193                 numchan = 8;
4194                 break;
4195         case 4:
4196                 numchan = 3;
4197                 break;
4198         case 5:
4199                 numchan = 6;
4200                 break;
4201         case 6:
4202                 numchan = 10;
4203                 break;
4204         case 7:
4205                 numchan = 12;
4206                 break;
4207         case 8:
4208                 numchan = 16;
4209                 break;
4210         }
4211         rdev->mc.vram_width = numchan * chansize;
4212         /* Could aper size report 0 ? */
4213         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4214         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4215         /* size in MB on si */
4216         tmp = RREG32(CONFIG_MEMSIZE);
4217         /* some boards may have garbage in the upper 16 bits */
4218         if (tmp & 0xffff0000) {
4219                 DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4220                 if (tmp & 0xffff)
4221                         tmp &= 0xffff;
4222         }
4223         rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4224         rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4225         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4226         si_vram_gtt_location(rdev, &rdev->mc);
4227         radeon_update_bandwidth_info(rdev);
4228
4229         return 0;
4230 }
4231
4232 /*
4233  * GART
4234  */
4235 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4236 {
4237         /* flush hdp cache */
4238         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4239
4240         /* bits 0-15 are the VM contexts0-15 */
4241         WREG32(VM_INVALIDATE_REQUEST, 1);
4242 }
4243
4244 static int si_pcie_gart_enable(struct radeon_device *rdev)
4245 {
4246         int r, i;
4247
4248         if (rdev->gart.robj == NULL) {
4249                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4250                 return -EINVAL;
4251         }
4252         r = radeon_gart_table_vram_pin(rdev);
4253         if (r)
4254                 return r;
4255         /* Setup TLB control */
4256         WREG32(MC_VM_MX_L1_TLB_CNTL,
4257                (0xA << 7) |
4258                ENABLE_L1_TLB |
4259                ENABLE_L1_FRAGMENT_PROCESSING |
4260                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4261                ENABLE_ADVANCED_DRIVER_MODEL |
4262                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4263         /* Setup L2 cache */
4264         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4265                ENABLE_L2_FRAGMENT_PROCESSING |
4266                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4267                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4268                EFFECTIVE_L2_QUEUE_SIZE(7) |
4269                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4270         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4271         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4272                BANK_SELECT(4) |
4273                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4274         /* setup context0 */
4275         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4276         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4277         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4278         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4279                         (u32)(rdev->dummy_page.addr >> 12));
4280         WREG32(VM_CONTEXT0_CNTL2, 0);
4281         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4282                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4283
4284         WREG32(0x15D4, 0);
4285         WREG32(0x15D8, 0);
4286         WREG32(0x15DC, 0);
4287
4288         /* empty context1-15 */
4289         /* set vm size, must be a multiple of 4 */
4290         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4291         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4292         /* Assign the pt base to something valid for now; the pts used for
4293          * the VMs are determined by the application and setup and assigned
4294          * on the fly in the vm part of radeon_gart.c
4295          */
4296         for (i = 1; i < 16; i++) {
4297                 if (i < 8)
4298                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4299                                rdev->vm_manager.saved_table_addr[i]);
4300                 else
4301                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4302                                rdev->vm_manager.saved_table_addr[i]);
4303         }
4304
4305         /* enable context1-15 */
4306         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4307                (u32)(rdev->dummy_page.addr >> 12));
4308         WREG32(VM_CONTEXT1_CNTL2, 4);
4309         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4310                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4311                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4312                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4313                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4314                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4315                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4316                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4317                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4318                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4319                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4320                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4321                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4322                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4323
4324         si_pcie_gart_tlb_flush(rdev);
4325         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4326                  (unsigned)(rdev->mc.gtt_size >> 20),
4327                  (unsigned long long)rdev->gart.table_addr);
4328         rdev->gart.ready = true;
4329         return 0;
4330 }
4331
4332 static void si_pcie_gart_disable(struct radeon_device *rdev)
4333 {
4334         unsigned i;
4335
4336         for (i = 1; i < 16; ++i) {
4337                 uint32_t reg;
4338                 if (i < 8)
4339                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4340                 else
4341                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4342                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4343         }
4344
4345         /* Disable all tables */
4346         WREG32(VM_CONTEXT0_CNTL, 0);
4347         WREG32(VM_CONTEXT1_CNTL, 0);
4348         /* Setup TLB control */
4349         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4350                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4351         /* Setup L2 cache */
4352         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4353                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4354                EFFECTIVE_L2_QUEUE_SIZE(7) |
4355                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4356         WREG32(VM_L2_CNTL2, 0);
4357         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4358                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4359         radeon_gart_table_vram_unpin(rdev);
4360 }
4361
4362 static void si_pcie_gart_fini(struct radeon_device *rdev)
4363 {
4364         si_pcie_gart_disable(rdev);
4365         radeon_gart_table_vram_free(rdev);
4366         radeon_gart_fini(rdev);
4367 }
4368
4369 /* vm parser */
4370 static bool si_vm_reg_valid(u32 reg)
4371 {
4372         /* context regs are fine */
4373         if (reg >= 0x28000)
4374                 return true;
4375
4376         /* check config regs */
4377         switch (reg) {
4378         case GRBM_GFX_INDEX:
4379         case CP_STRMOUT_CNTL:
4380         case VGT_VTX_VECT_EJECT_REG:
4381         case VGT_CACHE_INVALIDATION:
4382         case VGT_ESGS_RING_SIZE:
4383         case VGT_GSVS_RING_SIZE:
4384         case VGT_GS_VERTEX_REUSE:
4385         case VGT_PRIMITIVE_TYPE:
4386         case VGT_INDEX_TYPE:
4387         case VGT_NUM_INDICES:
4388         case VGT_NUM_INSTANCES:
4389         case VGT_TF_RING_SIZE:
4390         case VGT_HS_OFFCHIP_PARAM:
4391         case VGT_TF_MEMORY_BASE:
4392         case PA_CL_ENHANCE:
4393         case PA_SU_LINE_STIPPLE_VALUE:
4394         case PA_SC_LINE_STIPPLE_STATE:
4395         case PA_SC_ENHANCE:
4396         case SQC_CACHES:
4397         case SPI_STATIC_THREAD_MGMT_1:
4398         case SPI_STATIC_THREAD_MGMT_2:
4399         case SPI_STATIC_THREAD_MGMT_3:
4400         case SPI_PS_MAX_WAVE_ID:
4401         case SPI_CONFIG_CNTL:
4402         case SPI_CONFIG_CNTL_1:
4403         case TA_CNTL_AUX:
4404                 return true;
4405         default:
4406                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4407                 return false;
4408         }
4409 }
4410
4411 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4412                                   u32 *ib, struct radeon_cs_packet *pkt)
4413 {
4414         switch (pkt->opcode) {
4415         case PACKET3_NOP:
4416         case PACKET3_SET_BASE:
4417         case PACKET3_SET_CE_DE_COUNTERS:
4418         case PACKET3_LOAD_CONST_RAM:
4419         case PACKET3_WRITE_CONST_RAM:
4420         case PACKET3_WRITE_CONST_RAM_OFFSET:
4421         case PACKET3_DUMP_CONST_RAM:
4422         case PACKET3_INCREMENT_CE_COUNTER:
4423         case PACKET3_WAIT_ON_DE_COUNTER:
4424         case PACKET3_CE_WRITE:
4425                 break;
4426         default:
4427                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4428                 return -EINVAL;
4429         }
4430         return 0;
4431 }
4432
4433 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4434 {
4435         u32 start_reg, reg, i;
4436         u32 command = ib[idx + 4];
4437         u32 info = ib[idx + 1];
4438         u32 idx_value = ib[idx];
4439         if (command & PACKET3_CP_DMA_CMD_SAS) {
4440                 /* src address space is register */
4441                 if (((info & 0x60000000) >> 29) == 0) {
4442                         start_reg = idx_value << 2;
4443                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4444                                 reg = start_reg;
4445                                 if (!si_vm_reg_valid(reg)) {
4446                                         DRM_ERROR("CP DMA Bad SRC register\n");
4447                                         return -EINVAL;
4448                                 }
4449                         } else {
4450                                 for (i = 0; i < (command & 0x1fffff); i++) {
4451                                         reg = start_reg + (4 * i);
4452                                         if (!si_vm_reg_valid(reg)) {
4453                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4454                                                 return -EINVAL;
4455                                         }
4456                                 }
4457                         }
4458                 }
4459         }
4460         if (command & PACKET3_CP_DMA_CMD_DAS) {
4461                 /* dst address space is register */
4462                 if (((info & 0x00300000) >> 20) == 0) {
4463                         start_reg = ib[idx + 2];
4464                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4465                                 reg = start_reg;
4466                                 if (!si_vm_reg_valid(reg)) {
4467                                         DRM_ERROR("CP DMA Bad DST register\n");
4468                                         return -EINVAL;
4469                                 }
4470                         } else {
4471                                 for (i = 0; i < (command & 0x1fffff); i++) {
4472                                         reg = start_reg + (4 * i);
4473                                 if (!si_vm_reg_valid(reg)) {
4474                                                 DRM_ERROR("CP DMA Bad DST register\n");
4475                                                 return -EINVAL;
4476                                         }
4477                                 }
4478                         }
4479                 }
4480         }
4481         return 0;
4482 }
4483
4484 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4485                                    u32 *ib, struct radeon_cs_packet *pkt)
4486 {
4487         int r;
4488         u32 idx = pkt->idx + 1;
4489         u32 idx_value = ib[idx];
4490         u32 start_reg, end_reg, reg, i;
4491
4492         switch (pkt->opcode) {
4493         case PACKET3_NOP:
4494         case PACKET3_SET_BASE:
4495         case PACKET3_CLEAR_STATE:
4496         case PACKET3_INDEX_BUFFER_SIZE:
4497         case PACKET3_DISPATCH_DIRECT:
4498         case PACKET3_DISPATCH_INDIRECT:
4499         case PACKET3_ALLOC_GDS:
4500         case PACKET3_WRITE_GDS_RAM:
4501         case PACKET3_ATOMIC_GDS:
4502         case PACKET3_ATOMIC:
4503         case PACKET3_OCCLUSION_QUERY:
4504         case PACKET3_SET_PREDICATION:
4505         case PACKET3_COND_EXEC:
4506         case PACKET3_PRED_EXEC:
4507         case PACKET3_DRAW_INDIRECT:
4508         case PACKET3_DRAW_INDEX_INDIRECT:
4509         case PACKET3_INDEX_BASE:
4510         case PACKET3_DRAW_INDEX_2:
4511         case PACKET3_CONTEXT_CONTROL:
4512         case PACKET3_INDEX_TYPE:
4513         case PACKET3_DRAW_INDIRECT_MULTI:
4514         case PACKET3_DRAW_INDEX_AUTO:
4515         case PACKET3_DRAW_INDEX_IMMD:
4516         case PACKET3_NUM_INSTANCES:
4517         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4518         case PACKET3_STRMOUT_BUFFER_UPDATE:
4519         case PACKET3_DRAW_INDEX_OFFSET_2:
4520         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4521         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4522         case PACKET3_MPEG_INDEX:
4523         case PACKET3_WAIT_REG_MEM:
4524         case PACKET3_MEM_WRITE:
4525         case PACKET3_PFP_SYNC_ME:
4526         case PACKET3_SURFACE_SYNC:
4527         case PACKET3_EVENT_WRITE:
4528         case PACKET3_EVENT_WRITE_EOP:
4529         case PACKET3_EVENT_WRITE_EOS:
4530         case PACKET3_SET_CONTEXT_REG:
4531         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4532         case PACKET3_SET_SH_REG:
4533         case PACKET3_SET_SH_REG_OFFSET:
4534         case PACKET3_INCREMENT_DE_COUNTER:
4535         case PACKET3_WAIT_ON_CE_COUNTER:
4536         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4537         case PACKET3_ME_WRITE:
4538                 break;
4539         case PACKET3_COPY_DATA:
4540                 if ((idx_value & 0xf00) == 0) {
4541                         reg = ib[idx + 3] * 4;
4542                         if (!si_vm_reg_valid(reg))
4543                                 return -EINVAL;
4544                 }
4545                 break;
4546         case PACKET3_WRITE_DATA:
4547                 if ((idx_value & 0xf00) == 0) {
4548                         start_reg = ib[idx + 1] * 4;
4549                         if (idx_value & 0x10000) {
4550                                 if (!si_vm_reg_valid(start_reg))
4551                                         return -EINVAL;
4552                         } else {
4553                                 for (i = 0; i < (pkt->count - 2); i++) {
4554                                         reg = start_reg + (4 * i);
4555                                         if (!si_vm_reg_valid(reg))
4556                                                 return -EINVAL;
4557                                 }
4558                         }
4559                 }
4560                 break;
4561         case PACKET3_COND_WRITE:
4562                 if (idx_value & 0x100) {
4563                         reg = ib[idx + 5] * 4;
4564                         if (!si_vm_reg_valid(reg))
4565                                 return -EINVAL;
4566                 }
4567                 break;
4568         case PACKET3_COPY_DW:
4569                 if (idx_value & 0x2) {
4570                         reg = ib[idx + 3] * 4;
4571                         if (!si_vm_reg_valid(reg))
4572                                 return -EINVAL;
4573                 }
4574                 break;
4575         case PACKET3_SET_CONFIG_REG:
4576                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4577                 end_reg = 4 * pkt->count + start_reg - 4;
4578                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4579                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4580                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4581                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4582                         return -EINVAL;
4583                 }
4584                 for (i = 0; i < pkt->count; i++) {
4585                         reg = start_reg + (4 * i);
4586                         if (!si_vm_reg_valid(reg))
4587                                 return -EINVAL;
4588                 }
4589                 break;
4590         case PACKET3_CP_DMA:
4591                 r = si_vm_packet3_cp_dma_check(ib, idx);
4592                 if (r)
4593                         return r;
4594                 break;
4595         default:
4596                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4597                 return -EINVAL;
4598         }
4599         return 0;
4600 }
4601
4602 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4603                                        u32 *ib, struct radeon_cs_packet *pkt)
4604 {
4605         int r;
4606         u32 idx = pkt->idx + 1;
4607         u32 idx_value = ib[idx];
4608         u32 start_reg, reg, i;
4609
4610         switch (pkt->opcode) {
4611         case PACKET3_NOP:
4612         case PACKET3_SET_BASE:
4613         case PACKET3_CLEAR_STATE:
4614         case PACKET3_DISPATCH_DIRECT:
4615         case PACKET3_DISPATCH_INDIRECT:
4616         case PACKET3_ALLOC_GDS:
4617         case PACKET3_WRITE_GDS_RAM:
4618         case PACKET3_ATOMIC_GDS:
4619         case PACKET3_ATOMIC:
4620         case PACKET3_OCCLUSION_QUERY:
4621         case PACKET3_SET_PREDICATION:
4622         case PACKET3_COND_EXEC:
4623         case PACKET3_PRED_EXEC:
4624         case PACKET3_CONTEXT_CONTROL:
4625         case PACKET3_STRMOUT_BUFFER_UPDATE:
4626         case PACKET3_WAIT_REG_MEM:
4627         case PACKET3_MEM_WRITE:
4628         case PACKET3_PFP_SYNC_ME:
4629         case PACKET3_SURFACE_SYNC:
4630         case PACKET3_EVENT_WRITE:
4631         case PACKET3_EVENT_WRITE_EOP:
4632         case PACKET3_EVENT_WRITE_EOS:
4633         case PACKET3_SET_CONTEXT_REG:
4634         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4635         case PACKET3_SET_SH_REG:
4636         case PACKET3_SET_SH_REG_OFFSET:
4637         case PACKET3_INCREMENT_DE_COUNTER:
4638         case PACKET3_WAIT_ON_CE_COUNTER:
4639         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4640         case PACKET3_ME_WRITE:
4641                 break;
4642         case PACKET3_COPY_DATA:
4643                 if ((idx_value & 0xf00) == 0) {
4644                         reg = ib[idx + 3] * 4;
4645                         if (!si_vm_reg_valid(reg))
4646                                 return -EINVAL;
4647                 }
4648                 break;
4649         case PACKET3_WRITE_DATA:
4650                 if ((idx_value & 0xf00) == 0) {
4651                         start_reg = ib[idx + 1] * 4;
4652                         if (idx_value & 0x10000) {
4653                                 if (!si_vm_reg_valid(start_reg))
4654                                         return -EINVAL;
4655                         } else {
4656                                 for (i = 0; i < (pkt->count - 2); i++) {
4657                                         reg = start_reg + (4 * i);
4658                                         if (!si_vm_reg_valid(reg))
4659                                                 return -EINVAL;
4660                                 }
4661                         }
4662                 }
4663                 break;
4664         case PACKET3_COND_WRITE:
4665                 if (idx_value & 0x100) {
4666                         reg = ib[idx + 5] * 4;
4667                         if (!si_vm_reg_valid(reg))
4668                                 return -EINVAL;
4669                 }
4670                 break;
4671         case PACKET3_COPY_DW:
4672                 if (idx_value & 0x2) {
4673                         reg = ib[idx + 3] * 4;
4674                         if (!si_vm_reg_valid(reg))
4675                                 return -EINVAL;
4676                 }
4677                 break;
4678         case PACKET3_CP_DMA:
4679                 r = si_vm_packet3_cp_dma_check(ib, idx);
4680                 if (r)
4681                         return r;
4682                 break;
4683         default:
4684                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4685                 return -EINVAL;
4686         }
4687         return 0;
4688 }
4689
4690 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4691 {
4692         int ret = 0;
4693         u32 idx = 0, i;
4694         struct radeon_cs_packet pkt;
4695
4696         do {
4697                 pkt.idx = idx;
4698                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4699                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4700                 pkt.one_reg_wr = 0;
4701                 switch (pkt.type) {
4702                 case RADEON_PACKET_TYPE0:
4703                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4704                         ret = -EINVAL;
4705                         break;
4706                 case RADEON_PACKET_TYPE2:
4707                         idx += 1;
4708                         break;
4709                 case RADEON_PACKET_TYPE3:
4710                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4711                         if (ib->is_const_ib)
4712                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4713                         else {
4714                                 switch (ib->ring) {
4715                                 case RADEON_RING_TYPE_GFX_INDEX:
4716                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4717                                         break;
4718                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4719                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4720                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4721                                         break;
4722                                 default:
4723                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4724                                         ret = -EINVAL;
4725                                         break;
4726                                 }
4727                         }
4728                         idx += pkt.count + 2;
4729                         break;
4730                 default:
4731                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4732                         ret = -EINVAL;
4733                         break;
4734                 }
4735                 if (ret) {
4736                         for (i = 0; i < ib->length_dw; i++) {
4737                                 if (i == idx)
4738                                         printk("\t0x%08x <---\n", ib->ptr[i]);
4739                                 else
4740                                         printk("\t0x%08x\n", ib->ptr[i]);
4741                         }
4742                         break;
4743                 }
4744         } while (idx < ib->length_dw);
4745
4746         return ret;
4747 }
4748
4749 /*
4750  * vm
4751  */
4752 int si_vm_init(struct radeon_device *rdev)
4753 {
4754         /* number of VMs */
4755         rdev->vm_manager.nvm = 16;
4756         /* base offset of vram pages */
4757         rdev->vm_manager.vram_base_offset = 0;
4758
4759         return 0;
4760 }
4761
4762 void si_vm_fini(struct radeon_device *rdev)
4763 {
4764 }
4765
4766 /**
4767  * si_vm_decode_fault - print human readable fault info
4768  *
4769  * @rdev: radeon_device pointer
4770  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4771  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4772  *
4773  * Print human readable fault information (SI).
4774  */
4775 static void si_vm_decode_fault(struct radeon_device *rdev,
4776                                u32 status, u32 addr)
4777 {
4778         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4779         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4780         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4781         char *block;
4782
4783         if (rdev->family == CHIP_TAHITI) {
4784                 switch (mc_id) {
4785                 case 160:
4786                 case 144:
4787                 case 96:
4788                 case 80:
4789                 case 224:
4790                 case 208:
4791                 case 32:
4792                 case 16:
4793                         block = "CB";
4794                         break;
4795                 case 161:
4796                 case 145:
4797                 case 97:
4798                 case 81:
4799                 case 225:
4800                 case 209:
4801                 case 33:
4802                 case 17:
4803                         block = "CB_FMASK";
4804                         break;
4805                 case 162:
4806                 case 146:
4807                 case 98:
4808                 case 82:
4809                 case 226:
4810                 case 210:
4811                 case 34:
4812                 case 18:
4813                         block = "CB_CMASK";
4814                         break;
4815                 case 163:
4816                 case 147:
4817                 case 99:
4818                 case 83:
4819                 case 227:
4820                 case 211:
4821                 case 35:
4822                 case 19:
4823                         block = "CB_IMMED";
4824                         break;
4825                 case 164:
4826                 case 148:
4827                 case 100:
4828                 case 84:
4829                 case 228:
4830                 case 212:
4831                 case 36:
4832                 case 20:
4833                         block = "DB";
4834                         break;
4835                 case 165:
4836                 case 149:
4837                 case 101:
4838                 case 85:
4839                 case 229:
4840                 case 213:
4841                 case 37:
4842                 case 21:
4843                         block = "DB_HTILE";
4844                         break;
4845                 case 167:
4846                 case 151:
4847                 case 103:
4848                 case 87:
4849                 case 231:
4850                 case 215:
4851                 case 39:
4852                 case 23:
4853                         block = "DB_STEN";
4854                         break;
4855                 case 72:
4856                 case 68:
4857                 case 64:
4858                 case 8:
4859                 case 4:
4860                 case 0:
4861                 case 136:
4862                 case 132:
4863                 case 128:
4864                 case 200:
4865                 case 196:
4866                 case 192:
4867                         block = "TC";
4868                         break;
4869                 case 112:
4870                 case 48:
4871                         block = "CP";
4872                         break;
4873                 case 49:
4874                 case 177:
4875                 case 50:
4876                 case 178:
4877                         block = "SH";
4878                         break;
4879                 case 53:
4880                 case 190:
4881                         block = "VGT";
4882                         break;
4883                 case 117:
4884                         block = "IH";
4885                         break;
4886                 case 51:
4887                 case 115:
4888                         block = "RLC";
4889                         break;
4890                 case 119:
4891                 case 183:
4892                         block = "DMA0";
4893                         break;
4894                 case 61:
4895                         block = "DMA1";
4896                         break;
4897                 case 248:
4898                 case 120:
4899                         block = "HDP";
4900                         break;
4901                 default:
4902                         block = "unknown";
4903                         break;
4904                 }
4905         } else {
4906                 switch (mc_id) {
4907                 case 32:
4908                 case 16:
4909                 case 96:
4910                 case 80:
4911                 case 160:
4912                 case 144:
4913                 case 224:
4914                 case 208:
4915                         block = "CB";
4916                         break;
4917                 case 33:
4918                 case 17:
4919                 case 97:
4920                 case 81:
4921                 case 161:
4922                 case 145:
4923                 case 225:
4924                 case 209:
4925                         block = "CB_FMASK";
4926                         break;
4927                 case 34:
4928                 case 18:
4929                 case 98:
4930                 case 82:
4931                 case 162:
4932                 case 146:
4933                 case 226:
4934                 case 210:
4935                         block = "CB_CMASK";
4936                         break;
4937                 case 35:
4938                 case 19:
4939                 case 99:
4940                 case 83:
4941                 case 163:
4942                 case 147:
4943                 case 227:
4944                 case 211:
4945                         block = "CB_IMMED";
4946                         break;
4947                 case 36:
4948                 case 20:
4949                 case 100:
4950                 case 84:
4951                 case 164:
4952                 case 148:
4953                 case 228:
4954                 case 212:
4955                         block = "DB";
4956                         break;
4957                 case 37:
4958                 case 21:
4959                 case 101:
4960                 case 85:
4961                 case 165:
4962                 case 149:
4963                 case 229:
4964                 case 213:
4965                         block = "DB_HTILE";
4966                         break;
4967                 case 39:
4968                 case 23:
4969                 case 103:
4970                 case 87:
4971                 case 167:
4972                 case 151:
4973                 case 231:
4974                 case 215:
4975                         block = "DB_STEN";
4976                         break;
4977                 case 72:
4978                 case 68:
4979                 case 8:
4980                 case 4:
4981                 case 136:
4982                 case 132:
4983                 case 200:
4984                 case 196:
4985                         block = "TC";
4986                         break;
4987                 case 112:
4988                 case 48:
4989                         block = "CP";
4990                         break;
4991                 case 49:
4992                 case 177:
4993                 case 50:
4994                 case 178:
4995                         block = "SH";
4996                         break;
4997                 case 53:
4998                         block = "VGT";
4999                         break;
5000                 case 117:
5001                         block = "IH";
5002                         break;
5003                 case 51:
5004                 case 115:
5005                         block = "RLC";
5006                         break;
5007                 case 119:
5008                 case 183:
5009                         block = "DMA0";
5010                         break;
5011                 case 61:
5012                         block = "DMA1";
5013                         break;
5014                 case 248:
5015                 case 120:
5016                         block = "HDP";
5017                         break;
5018                 default:
5019                         block = "unknown";
5020                         break;
5021                 }
5022         }
5023
5024         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5025                protections, vmid, addr,
5026                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5027                block, mc_id);
5028 }
5029
5030 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5031                  unsigned vm_id, uint64_t pd_addr)
5032 {
5033         /* write new base address */
5034         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5035         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5036                                  WRITE_DATA_DST_SEL(0)));
5037
5038         if (vm_id < 8) {
5039                 radeon_ring_write(ring,
5040                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5041         } else {
5042                 radeon_ring_write(ring,
5043                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5044         }
5045         radeon_ring_write(ring, 0);
5046         radeon_ring_write(ring, pd_addr >> 12);
5047
5048         /* flush hdp cache */
5049         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5050         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5051                                  WRITE_DATA_DST_SEL(0)));
5052         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5053         radeon_ring_write(ring, 0);
5054         radeon_ring_write(ring, 0x1);
5055
5056         /* bits 0-15 are the VM contexts0-15 */
5057         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5058         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5059                                  WRITE_DATA_DST_SEL(0)));
5060         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5061         radeon_ring_write(ring, 0);
5062         radeon_ring_write(ring, 1 << vm_id);
5063
5064         /* wait for the invalidate to complete */
5065         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5066         radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5067                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5068         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5069         radeon_ring_write(ring, 0);
5070         radeon_ring_write(ring, 0); /* ref */
5071         radeon_ring_write(ring, 0); /* mask */
5072         radeon_ring_write(ring, 0x20); /* poll interval */
5073
5074         /* sync PFP to ME, otherwise we might get invalid PFP reads */
5075         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5076         radeon_ring_write(ring, 0x0);
5077 }
5078
5079 /*
5080  *  Power and clock gating
5081  */
5082 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5083 {
5084         int i;
5085
5086         for (i = 0; i < rdev->usec_timeout; i++) {
5087                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5088                         break;
5089                 udelay(1);
5090         }
5091
5092         for (i = 0; i < rdev->usec_timeout; i++) {
5093                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5094                         break;
5095                 udelay(1);
5096         }
5097 }
5098
5099 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5100                                          bool enable)
5101 {
5102         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5103         u32 mask;
5104         int i;
5105
5106         if (enable)
5107                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5108         else
5109                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5110         WREG32(CP_INT_CNTL_RING0, tmp);
5111
5112         if (!enable) {
5113                 /* read a gfx register */
5114                 tmp = RREG32(DB_DEPTH_INFO);
5115
5116                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5117                 for (i = 0; i < rdev->usec_timeout; i++) {
5118                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5119                                 break;
5120                         udelay(1);
5121                 }
5122         }
5123 }
5124
5125 static void si_set_uvd_dcm(struct radeon_device *rdev,
5126                            bool sw_mode)
5127 {
5128         u32 tmp, tmp2;
5129
5130         tmp = RREG32(UVD_CGC_CTRL);
5131         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5132         tmp |= DCM | CG_DT(1) | CLK_OD(4);
5133
5134         if (sw_mode) {
5135                 tmp &= ~0x7ffff800;
5136                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5137         } else {
5138                 tmp |= 0x7ffff800;
5139                 tmp2 = 0;
5140         }
5141
5142         WREG32(UVD_CGC_CTRL, tmp);
5143         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5144 }
5145
5146 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5147 {
5148         bool hw_mode = true;
5149
5150         if (hw_mode) {
5151                 si_set_uvd_dcm(rdev, false);
5152         } else {
5153                 u32 tmp = RREG32(UVD_CGC_CTRL);
5154                 tmp &= ~DCM;
5155                 WREG32(UVD_CGC_CTRL, tmp);
5156         }
5157 }
5158
5159 static u32 si_halt_rlc(struct radeon_device *rdev)
5160 {
5161         u32 data, orig;
5162
5163         orig = data = RREG32(RLC_CNTL);
5164
5165         if (data & RLC_ENABLE) {
5166                 data &= ~RLC_ENABLE;
5167                 WREG32(RLC_CNTL, data);
5168
5169                 si_wait_for_rlc_serdes(rdev);
5170         }
5171
5172         return orig;
5173 }
5174
5175 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5176 {
5177         u32 tmp;
5178
5179         tmp = RREG32(RLC_CNTL);
5180         if (tmp != rlc)
5181                 WREG32(RLC_CNTL, rlc);
5182 }
5183
5184 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5185 {
5186         u32 data, orig;
5187
5188         orig = data = RREG32(DMA_PG);
5189         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5190                 data |= PG_CNTL_ENABLE;
5191         else
5192                 data &= ~PG_CNTL_ENABLE;
5193         if (orig != data)
5194                 WREG32(DMA_PG, data);
5195 }
5196
5197 static void si_init_dma_pg(struct radeon_device *rdev)
5198 {
5199         u32 tmp;
5200
5201         WREG32(DMA_PGFSM_WRITE,  0x00002000);
5202         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5203
5204         for (tmp = 0; tmp < 5; tmp++)
5205                 WREG32(DMA_PGFSM_WRITE, 0);
5206 }
5207
5208 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5209                                bool enable)
5210 {
5211         u32 tmp;
5212
5213         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5214                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5215                 WREG32(RLC_TTOP_D, tmp);
5216
5217                 tmp = RREG32(RLC_PG_CNTL);
5218                 tmp |= GFX_PG_ENABLE;
5219                 WREG32(RLC_PG_CNTL, tmp);
5220
5221                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5222                 tmp |= AUTO_PG_EN;
5223                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5224         } else {
5225                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5226                 tmp &= ~AUTO_PG_EN;
5227                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5228
5229                 tmp = RREG32(DB_RENDER_CONTROL);
5230         }
5231 }
5232
5233 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5234 {
5235         u32 tmp;
5236
5237         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5238
5239         tmp = RREG32(RLC_PG_CNTL);
5240         tmp |= GFX_PG_SRC;
5241         WREG32(RLC_PG_CNTL, tmp);
5242
5243         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5244
5245         tmp = RREG32(RLC_AUTO_PG_CTRL);
5246
5247         tmp &= ~GRBM_REG_SGIT_MASK;
5248         tmp |= GRBM_REG_SGIT(0x700);
5249         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5250         WREG32(RLC_AUTO_PG_CTRL, tmp);
5251 }
5252
5253 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5254 {
5255         u32 mask = 0, tmp, tmp1;
5256         int i;
5257
5258         si_select_se_sh(rdev, se, sh);
5259         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5260         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5261         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5262
5263         tmp &= 0xffff0000;
5264
5265         tmp |= tmp1;
5266         tmp >>= 16;
5267
5268         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5269                 mask <<= 1;
5270                 mask |= 1;
5271         }
5272
5273         return (~tmp) & mask;
5274 }
5275
5276 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5277 {
5278         u32 i, j, k, active_cu_number = 0;
5279         u32 mask, counter, cu_bitmap;
5280         u32 tmp = 0;
5281
5282         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5283                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5284                         mask = 1;
5285                         cu_bitmap = 0;
5286                         counter  = 0;
5287                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5288                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5289                                         if (counter < 2)
5290                                                 cu_bitmap |= mask;
5291                                         counter++;
5292                                 }
5293                                 mask <<= 1;
5294                         }
5295
5296                         active_cu_number += counter;
5297                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5298                 }
5299         }
5300
5301         WREG32(RLC_PG_AO_CU_MASK, tmp);
5302
5303         tmp = RREG32(RLC_MAX_PG_CU);
5304         tmp &= ~MAX_PU_CU_MASK;
5305         tmp |= MAX_PU_CU(active_cu_number);
5306         WREG32(RLC_MAX_PG_CU, tmp);
5307 }
5308
5309 static void si_enable_cgcg(struct radeon_device *rdev,
5310                            bool enable)
5311 {
5312         u32 data, orig, tmp;
5313
5314         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5315
5316         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5317                 si_enable_gui_idle_interrupt(rdev, true);
5318
5319                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5320
5321                 tmp = si_halt_rlc(rdev);
5322
5323                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5324                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5325                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5326
5327                 si_wait_for_rlc_serdes(rdev);
5328
5329                 si_update_rlc(rdev, tmp);
5330
5331                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5332
5333                 data |= CGCG_EN | CGLS_EN;
5334         } else {
5335                 si_enable_gui_idle_interrupt(rdev, false);
5336
5337                 RREG32(CB_CGTT_SCLK_CTRL);
5338                 RREG32(CB_CGTT_SCLK_CTRL);
5339                 RREG32(CB_CGTT_SCLK_CTRL);
5340                 RREG32(CB_CGTT_SCLK_CTRL);
5341
5342                 data &= ~(CGCG_EN | CGLS_EN);
5343         }
5344
5345         if (orig != data)
5346                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5347 }
5348
5349 static void si_enable_mgcg(struct radeon_device *rdev,
5350                            bool enable)
5351 {
5352         u32 data, orig, tmp = 0;
5353
5354         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5355                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5356                 data = 0x96940200;
5357                 if (orig != data)
5358                         WREG32(CGTS_SM_CTRL_REG, data);
5359
5360                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5361                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5362                         data |= CP_MEM_LS_EN;
5363                         if (orig != data)
5364                                 WREG32(CP_MEM_SLP_CNTL, data);
5365                 }
5366
5367                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5368                 data &= 0xffffffc0;
5369                 if (orig != data)
5370                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5371
5372                 tmp = si_halt_rlc(rdev);
5373
5374                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5375                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5376                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5377
5378                 si_update_rlc(rdev, tmp);
5379         } else {
5380                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5381                 data |= 0x00000003;
5382                 if (orig != data)
5383                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5384
5385                 data = RREG32(CP_MEM_SLP_CNTL);
5386                 if (data & CP_MEM_LS_EN) {
5387                         data &= ~CP_MEM_LS_EN;
5388                         WREG32(CP_MEM_SLP_CNTL, data);
5389                 }
5390                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5391                 data |= LS_OVERRIDE | OVERRIDE;
5392                 if (orig != data)
5393                         WREG32(CGTS_SM_CTRL_REG, data);
5394
5395                 tmp = si_halt_rlc(rdev);
5396
5397                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5398                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5399                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5400
5401                 si_update_rlc(rdev, tmp);
5402         }
5403 }
5404
5405 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5406                                bool enable)
5407 {
5408         u32 orig, data, tmp;
5409
5410         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5411                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5412                 tmp |= 0x3fff;
5413                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5414
5415                 orig = data = RREG32(UVD_CGC_CTRL);
5416                 data |= DCM;
5417                 if (orig != data)
5418                         WREG32(UVD_CGC_CTRL, data);
5419
5420                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5421                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5422         } else {
5423                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5424                 tmp &= ~0x3fff;
5425                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5426
5427                 orig = data = RREG32(UVD_CGC_CTRL);
5428                 data &= ~DCM;
5429                 if (orig != data)
5430                         WREG32(UVD_CGC_CTRL, data);
5431
5432                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5433                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5434         }
5435 }
5436
5437 static const u32 mc_cg_registers[] =
5438 {
5439         MC_HUB_MISC_HUB_CG,
5440         MC_HUB_MISC_SIP_CG,
5441         MC_HUB_MISC_VM_CG,
5442         MC_XPB_CLK_GAT,
5443         ATC_MISC_CG,
5444         MC_CITF_MISC_WR_CG,
5445         MC_CITF_MISC_RD_CG,
5446         MC_CITF_MISC_VM_CG,
5447         VM_L2_CG,
5448 };
5449
5450 static void si_enable_mc_ls(struct radeon_device *rdev,
5451                             bool enable)
5452 {
5453         int i;
5454         u32 orig, data;
5455
5456         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5457                 orig = data = RREG32(mc_cg_registers[i]);
5458                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5459                         data |= MC_LS_ENABLE;
5460                 else
5461                         data &= ~MC_LS_ENABLE;
5462                 if (data != orig)
5463                         WREG32(mc_cg_registers[i], data);
5464         }
5465 }
5466
5467 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5468                                bool enable)
5469 {
5470         int i;
5471         u32 orig, data;
5472
5473         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5474                 orig = data = RREG32(mc_cg_registers[i]);
5475                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5476                         data |= MC_CG_ENABLE;
5477                 else
5478                         data &= ~MC_CG_ENABLE;
5479                 if (data != orig)
5480                         WREG32(mc_cg_registers[i], data);
5481         }
5482 }
5483
5484 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5485                                bool enable)
5486 {
5487         u32 orig, data, offset;
5488         int i;
5489
5490         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5491                 for (i = 0; i < 2; i++) {
5492                         if (i == 0)
5493                                 offset = DMA0_REGISTER_OFFSET;
5494                         else
5495                                 offset = DMA1_REGISTER_OFFSET;
5496                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5497                         data &= ~MEM_POWER_OVERRIDE;
5498                         if (data != orig)
5499                                 WREG32(DMA_POWER_CNTL + offset, data);
5500                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5501                 }
5502         } else {
5503                 for (i = 0; i < 2; i++) {
5504                         if (i == 0)
5505                                 offset = DMA0_REGISTER_OFFSET;
5506                         else
5507                                 offset = DMA1_REGISTER_OFFSET;
5508                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5509                         data |= MEM_POWER_OVERRIDE;
5510                         if (data != orig)
5511                                 WREG32(DMA_POWER_CNTL + offset, data);
5512
5513                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5514                         data = 0xff000000;
5515                         if (data != orig)
5516                                 WREG32(DMA_CLK_CTRL + offset, data);
5517                 }
5518         }
5519 }
5520
5521 static void si_enable_bif_mgls(struct radeon_device *rdev,
5522                                bool enable)
5523 {
5524         u32 orig, data;
5525
5526         orig = data = RREG32_PCIE(PCIE_CNTL2);
5527
5528         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5529                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5530                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5531         else
5532                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5533                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5534
5535         if (orig != data)
5536                 WREG32_PCIE(PCIE_CNTL2, data);
5537 }
5538
5539 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5540                                bool enable)
5541 {
5542         u32 orig, data;
5543
5544         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5545
5546         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5547                 data &= ~CLOCK_GATING_DIS;
5548         else
5549                 data |= CLOCK_GATING_DIS;
5550
5551         if (orig != data)
5552                 WREG32(HDP_HOST_PATH_CNTL, data);
5553 }
5554
5555 static void si_enable_hdp_ls(struct radeon_device *rdev,
5556                              bool enable)
5557 {
5558         u32 orig, data;
5559
5560         orig = data = RREG32(HDP_MEM_POWER_LS);
5561
5562         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5563                 data |= HDP_LS_ENABLE;
5564         else
5565                 data &= ~HDP_LS_ENABLE;
5566
5567         if (orig != data)
5568                 WREG32(HDP_MEM_POWER_LS, data);
5569 }
5570
5571 static void si_update_cg(struct radeon_device *rdev,
5572                          u32 block, bool enable)
5573 {
5574         if (block & RADEON_CG_BLOCK_GFX) {
5575                 si_enable_gui_idle_interrupt(rdev, false);
5576                 /* order matters! */
5577                 if (enable) {
5578                         si_enable_mgcg(rdev, true);
5579                         si_enable_cgcg(rdev, true);
5580                 } else {
5581                         si_enable_cgcg(rdev, false);
5582                         si_enable_mgcg(rdev, false);
5583                 }
5584                 si_enable_gui_idle_interrupt(rdev, true);
5585         }
5586
5587         if (block & RADEON_CG_BLOCK_MC) {
5588                 si_enable_mc_mgcg(rdev, enable);
5589                 si_enable_mc_ls(rdev, enable);
5590         }
5591
5592         if (block & RADEON_CG_BLOCK_SDMA) {
5593                 si_enable_dma_mgcg(rdev, enable);
5594         }
5595
5596         if (block & RADEON_CG_BLOCK_BIF) {
5597                 si_enable_bif_mgls(rdev, enable);
5598         }
5599
5600         if (block & RADEON_CG_BLOCK_UVD) {
5601                 if (rdev->has_uvd) {
5602                         si_enable_uvd_mgcg(rdev, enable);
5603                 }
5604         }
5605
5606         if (block & RADEON_CG_BLOCK_HDP) {
5607                 si_enable_hdp_mgcg(rdev, enable);
5608                 si_enable_hdp_ls(rdev, enable);
5609         }
5610 }
5611
5612 static void si_init_cg(struct radeon_device *rdev)
5613 {
5614         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5615                             RADEON_CG_BLOCK_MC |
5616                             RADEON_CG_BLOCK_SDMA |
5617                             RADEON_CG_BLOCK_BIF |
5618                             RADEON_CG_BLOCK_HDP), true);
5619         if (rdev->has_uvd) {
5620                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5621                 si_init_uvd_internal_cg(rdev);
5622         }
5623 }
5624
5625 static void si_fini_cg(struct radeon_device *rdev)
5626 {
5627         if (rdev->has_uvd) {
5628                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5629         }
5630         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5631                             RADEON_CG_BLOCK_MC |
5632                             RADEON_CG_BLOCK_SDMA |
5633                             RADEON_CG_BLOCK_BIF |
5634                             RADEON_CG_BLOCK_HDP), false);
5635 }
5636
5637 u32 si_get_csb_size(struct radeon_device *rdev)
5638 {
5639         u32 count = 0;
5640         const struct cs_section_def *sect = NULL;
5641         const struct cs_extent_def *ext = NULL;
5642
5643         if (rdev->rlc.cs_data == NULL)
5644                 return 0;
5645
5646         /* begin clear state */
5647         count += 2;
5648         /* context control state */
5649         count += 3;
5650
5651         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5652                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5653                         if (sect->id == SECT_CONTEXT)
5654                                 count += 2 + ext->reg_count;
5655                         else
5656                                 return 0;
5657                 }
5658         }
5659         /* pa_sc_raster_config */
5660         count += 3;
5661         /* end clear state */
5662         count += 2;
5663         /* clear state */
5664         count += 2;
5665
5666         return count;
5667 }
5668
5669 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5670 {
5671         u32 count = 0, i;
5672         const struct cs_section_def *sect = NULL;
5673         const struct cs_extent_def *ext = NULL;
5674
5675         if (rdev->rlc.cs_data == NULL)
5676                 return;
5677         if (buffer == NULL)
5678                 return;
5679
5680         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5681         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5682
5683         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5684         buffer[count++] = cpu_to_le32(0x80000000);
5685         buffer[count++] = cpu_to_le32(0x80000000);
5686
5687         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5688                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5689                         if (sect->id == SECT_CONTEXT) {
5690                                 buffer[count++] =
5691                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5692                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5693                                 for (i = 0; i < ext->reg_count; i++)
5694                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
5695                         } else {
5696                                 return;
5697                         }
5698                 }
5699         }
5700
5701         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5702         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5703         switch (rdev->family) {
5704         case CHIP_TAHITI:
5705         case CHIP_PITCAIRN:
5706                 buffer[count++] = cpu_to_le32(0x2a00126a);
5707                 break;
5708         case CHIP_VERDE:
5709                 buffer[count++] = cpu_to_le32(0x0000124a);
5710                 break;
5711         case CHIP_OLAND:
5712                 buffer[count++] = cpu_to_le32(0x00000082);
5713                 break;
5714         case CHIP_HAINAN:
5715                 buffer[count++] = cpu_to_le32(0x00000000);
5716                 break;
5717         default:
5718                 buffer[count++] = cpu_to_le32(0x00000000);
5719                 break;
5720         }
5721
5722         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5723         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5724
5725         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5726         buffer[count++] = cpu_to_le32(0);
5727 }
5728
5729 static void si_init_pg(struct radeon_device *rdev)
5730 {
5731         if (rdev->pg_flags) {
5732                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5733                         si_init_dma_pg(rdev);
5734                 }
5735                 si_init_ao_cu_mask(rdev);
5736                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5737                         si_init_gfx_cgpg(rdev);
5738                 } else {
5739                         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5740                         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5741                 }
5742                 si_enable_dma_pg(rdev, true);
5743                 si_enable_gfx_cgpg(rdev, true);
5744         } else {
5745                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5746                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5747         }
5748 }
5749
5750 static void si_fini_pg(struct radeon_device *rdev)
5751 {
5752         if (rdev->pg_flags) {
5753                 si_enable_dma_pg(rdev, false);
5754                 si_enable_gfx_cgpg(rdev, false);
5755         }
5756 }
5757
5758 /*
5759  * RLC
5760  */
5761 void si_rlc_reset(struct radeon_device *rdev)
5762 {
5763         u32 tmp = RREG32(GRBM_SOFT_RESET);
5764
5765         tmp |= SOFT_RESET_RLC;
5766         WREG32(GRBM_SOFT_RESET, tmp);
5767         udelay(50);
5768         tmp &= ~SOFT_RESET_RLC;
5769         WREG32(GRBM_SOFT_RESET, tmp);
5770         udelay(50);
5771 }
5772
5773 static void si_rlc_stop(struct radeon_device *rdev)
5774 {
5775         WREG32(RLC_CNTL, 0);
5776
5777         si_enable_gui_idle_interrupt(rdev, false);
5778
5779         si_wait_for_rlc_serdes(rdev);
5780 }
5781
5782 static void si_rlc_start(struct radeon_device *rdev)
5783 {
5784         WREG32(RLC_CNTL, RLC_ENABLE);
5785
5786         si_enable_gui_idle_interrupt(rdev, true);
5787
5788         udelay(50);
5789 }
5790
5791 static bool si_lbpw_supported(struct radeon_device *rdev)
5792 {
5793         u32 tmp;
5794
5795         /* Enable LBPW only for DDR3 */
5796         tmp = RREG32(MC_SEQ_MISC0);
5797         if ((tmp & 0xF0000000) == 0xB0000000)
5798                 return true;
5799         return false;
5800 }
5801
5802 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5803 {
5804         u32 tmp;
5805
5806         tmp = RREG32(RLC_LB_CNTL);
5807         if (enable)
5808                 tmp |= LOAD_BALANCE_ENABLE;
5809         else
5810                 tmp &= ~LOAD_BALANCE_ENABLE;
5811         WREG32(RLC_LB_CNTL, tmp);
5812
5813         if (!enable) {
5814                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5815                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5816         }
5817 }
5818
5819 static int si_rlc_resume(struct radeon_device *rdev)
5820 {
5821         u32 i;
5822
5823         if (!rdev->rlc_fw)
5824                 return -EINVAL;
5825
5826         si_rlc_stop(rdev);
5827
5828         si_rlc_reset(rdev);
5829
5830         si_init_pg(rdev);
5831
5832         si_init_cg(rdev);
5833
5834         WREG32(RLC_RL_BASE, 0);
5835         WREG32(RLC_RL_SIZE, 0);
5836         WREG32(RLC_LB_CNTL, 0);
5837         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5838         WREG32(RLC_LB_CNTR_INIT, 0);
5839         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5840
5841         WREG32(RLC_MC_CNTL, 0);
5842         WREG32(RLC_UCODE_CNTL, 0);
5843
5844         if (rdev->new_fw) {
5845                 const struct rlc_firmware_header_v1_0 *hdr =
5846                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5847                 u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5848                 const __le32 *fw_data = (const __le32 *)
5849                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5850
5851                 radeon_ucode_print_rlc_hdr(&hdr->header);
5852
5853                 for (i = 0; i < fw_size; i++) {
5854                         WREG32(RLC_UCODE_ADDR, i);
5855                         WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5856                 }
5857         } else {
5858                 const __be32 *fw_data =
5859                         (const __be32 *)rdev->rlc_fw->data;
5860                 for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5861                         WREG32(RLC_UCODE_ADDR, i);
5862                         WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5863                 }
5864         }
5865         WREG32(RLC_UCODE_ADDR, 0);
5866
5867         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5868
5869         si_rlc_start(rdev);
5870
5871         return 0;
5872 }
5873
5874 static void si_enable_interrupts(struct radeon_device *rdev)
5875 {
5876         u32 ih_cntl = RREG32(IH_CNTL);
5877         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5878
5879         ih_cntl |= ENABLE_INTR;
5880         ih_rb_cntl |= IH_RB_ENABLE;
5881         WREG32(IH_CNTL, ih_cntl);
5882         WREG32(IH_RB_CNTL, ih_rb_cntl);
5883         rdev->ih.enabled = true;
5884 }
5885
5886 static void si_disable_interrupts(struct radeon_device *rdev)
5887 {
5888         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5889         u32 ih_cntl = RREG32(IH_CNTL);
5890
5891         ih_rb_cntl &= ~IH_RB_ENABLE;
5892         ih_cntl &= ~ENABLE_INTR;
5893         WREG32(IH_RB_CNTL, ih_rb_cntl);
5894         WREG32(IH_CNTL, ih_cntl);
5895         /* set rptr, wptr to 0 */
5896         WREG32(IH_RB_RPTR, 0);
5897         WREG32(IH_RB_WPTR, 0);
5898         rdev->ih.enabled = false;
5899         rdev->ih.rptr = 0;
5900 }
5901
5902 static void si_disable_interrupt_state(struct radeon_device *rdev)
5903 {
5904         u32 tmp;
5905
5906         tmp = RREG32(CP_INT_CNTL_RING0) &
5907                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5908         WREG32(CP_INT_CNTL_RING0, tmp);
5909         WREG32(CP_INT_CNTL_RING1, 0);
5910         WREG32(CP_INT_CNTL_RING2, 0);
5911         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5912         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5913         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5914         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5915         WREG32(GRBM_INT_CNTL, 0);
5916         WREG32(SRBM_INT_CNTL, 0);
5917         if (rdev->num_crtc >= 2) {
5918                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5919                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5920         }
5921         if (rdev->num_crtc >= 4) {
5922                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5923                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5924         }
5925         if (rdev->num_crtc >= 6) {
5926                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5927                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5928         }
5929
5930         if (rdev->num_crtc >= 2) {
5931                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5932                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5933         }
5934         if (rdev->num_crtc >= 4) {
5935                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5936                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5937         }
5938         if (rdev->num_crtc >= 6) {
5939                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5940                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5941         }
5942
5943         if (!ASIC_IS_NODCE(rdev)) {
5944                 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5945
5946                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5947                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5948                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5949                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5950                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5951                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5952                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5953                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5954                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5955                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5956                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5957                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5958         }
5959 }
5960
5961 static int si_irq_init(struct radeon_device *rdev)
5962 {
5963         int ret = 0;
5964         int rb_bufsz;
5965         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5966
5967         /* allocate ring */
5968         ret = r600_ih_ring_alloc(rdev);
5969         if (ret)
5970                 return ret;
5971
5972         /* disable irqs */
5973         si_disable_interrupts(rdev);
5974
5975         /* init rlc */
5976         ret = si_rlc_resume(rdev);
5977         if (ret) {
5978                 r600_ih_ring_fini(rdev);
5979                 return ret;
5980         }
5981
5982         /* setup interrupt control */
5983         /* set dummy read address to ring address */
5984         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5985         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5986         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5987          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5988          */
5989         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5990         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5991         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5992         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5993
5994         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5995         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5996
5997         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5998                       IH_WPTR_OVERFLOW_CLEAR |
5999                       (rb_bufsz << 1));
6000
6001         if (rdev->wb.enabled)
6002                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6003
6004         /* set the writeback address whether it's enabled or not */
6005         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6006         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6007
6008         WREG32(IH_RB_CNTL, ih_rb_cntl);
6009
6010         /* set rptr, wptr to 0 */
6011         WREG32(IH_RB_RPTR, 0);
6012         WREG32(IH_RB_WPTR, 0);
6013
6014         /* Default settings for IH_CNTL (disabled at first) */
6015         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6016         /* RPTR_REARM only works if msi's are enabled */
6017         if (rdev->msi_enabled)
6018                 ih_cntl |= RPTR_REARM;
6019         WREG32(IH_CNTL, ih_cntl);
6020
6021         /* force the active interrupt state to all disabled */
6022         si_disable_interrupt_state(rdev);
6023
6024         pci_set_master(rdev->pdev);
6025
6026         /* enable irqs */
6027         si_enable_interrupts(rdev);
6028
6029         return ret;
6030 }
6031
6032 int si_irq_set(struct radeon_device *rdev)
6033 {
6034         u32 cp_int_cntl;
6035         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6036         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6037         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6038         u32 grbm_int_cntl = 0;
6039         u32 dma_cntl, dma_cntl1;
6040         u32 thermal_int = 0;
6041
6042         if (!rdev->irq.installed) {
6043                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6044                 return -EINVAL;
6045         }
6046         /* don't enable anything if the ih is disabled */
6047         if (!rdev->ih.enabled) {
6048                 si_disable_interrupts(rdev);
6049                 /* force the active interrupt state to all disabled */
6050                 si_disable_interrupt_state(rdev);
6051                 return 0;
6052         }
6053
6054         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6055                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6056
6057         if (!ASIC_IS_NODCE(rdev)) {
6058                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6059                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6060                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6061                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6062                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6063                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6064         }
6065
6066         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6067         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6068
6069         thermal_int = RREG32(CG_THERMAL_INT) &
6070                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6071
6072         /* enable CP interrupts on all rings */
6073         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6074                 DRM_DEBUG("si_irq_set: sw int gfx\n");
6075                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6076         }
6077         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6078                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6079                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6080         }
6081         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6082                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6083                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6084         }
6085         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6086                 DRM_DEBUG("si_irq_set: sw int dma\n");
6087                 dma_cntl |= TRAP_ENABLE;
6088         }
6089
6090         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6091                 DRM_DEBUG("si_irq_set: sw int dma1\n");
6092                 dma_cntl1 |= TRAP_ENABLE;
6093         }
6094         if (rdev->irq.crtc_vblank_int[0] ||
6095             atomic_read(&rdev->irq.pflip[0])) {
6096                 DRM_DEBUG("si_irq_set: vblank 0\n");
6097                 crtc1 |= VBLANK_INT_MASK;
6098         }
6099         if (rdev->irq.crtc_vblank_int[1] ||
6100             atomic_read(&rdev->irq.pflip[1])) {
6101                 DRM_DEBUG("si_irq_set: vblank 1\n");
6102                 crtc2 |= VBLANK_INT_MASK;
6103         }
6104         if (rdev->irq.crtc_vblank_int[2] ||
6105             atomic_read(&rdev->irq.pflip[2])) {
6106                 DRM_DEBUG("si_irq_set: vblank 2\n");
6107                 crtc3 |= VBLANK_INT_MASK;
6108         }
6109         if (rdev->irq.crtc_vblank_int[3] ||
6110             atomic_read(&rdev->irq.pflip[3])) {
6111                 DRM_DEBUG("si_irq_set: vblank 3\n");
6112                 crtc4 |= VBLANK_INT_MASK;
6113         }
6114         if (rdev->irq.crtc_vblank_int[4] ||
6115             atomic_read(&rdev->irq.pflip[4])) {
6116                 DRM_DEBUG("si_irq_set: vblank 4\n");
6117                 crtc5 |= VBLANK_INT_MASK;
6118         }
6119         if (rdev->irq.crtc_vblank_int[5] ||
6120             atomic_read(&rdev->irq.pflip[5])) {
6121                 DRM_DEBUG("si_irq_set: vblank 5\n");
6122                 crtc6 |= VBLANK_INT_MASK;
6123         }
6124         if (rdev->irq.hpd[0]) {
6125                 DRM_DEBUG("si_irq_set: hpd 1\n");
6126                 hpd1 |= DC_HPDx_INT_EN;
6127         }
6128         if (rdev->irq.hpd[1]) {
6129                 DRM_DEBUG("si_irq_set: hpd 2\n");
6130                 hpd2 |= DC_HPDx_INT_EN;
6131         }
6132         if (rdev->irq.hpd[2]) {
6133                 DRM_DEBUG("si_irq_set: hpd 3\n");
6134                 hpd3 |= DC_HPDx_INT_EN;
6135         }
6136         if (rdev->irq.hpd[3]) {
6137                 DRM_DEBUG("si_irq_set: hpd 4\n");
6138                 hpd4 |= DC_HPDx_INT_EN;
6139         }
6140         if (rdev->irq.hpd[4]) {
6141                 DRM_DEBUG("si_irq_set: hpd 5\n");
6142                 hpd5 |= DC_HPDx_INT_EN;
6143         }
6144         if (rdev->irq.hpd[5]) {
6145                 DRM_DEBUG("si_irq_set: hpd 6\n");
6146                 hpd6 |= DC_HPDx_INT_EN;
6147         }
6148
6149         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6150         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6151         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6152
6153         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6154         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6155
6156         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6157
6158         if (rdev->irq.dpm_thermal) {
6159                 DRM_DEBUG("dpm thermal\n");
6160                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6161         }
6162
6163         if (rdev->num_crtc >= 2) {
6164                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6165                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6166         }
6167         if (rdev->num_crtc >= 4) {
6168                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6169                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6170         }
6171         if (rdev->num_crtc >= 6) {
6172                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6173                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6174         }
6175
6176         if (rdev->num_crtc >= 2) {
6177                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6178                        GRPH_PFLIP_INT_MASK);
6179                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6180                        GRPH_PFLIP_INT_MASK);
6181         }
6182         if (rdev->num_crtc >= 4) {
6183                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6184                        GRPH_PFLIP_INT_MASK);
6185                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6186                        GRPH_PFLIP_INT_MASK);
6187         }
6188         if (rdev->num_crtc >= 6) {
6189                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6190                        GRPH_PFLIP_INT_MASK);
6191                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6192                        GRPH_PFLIP_INT_MASK);
6193         }
6194
6195         if (!ASIC_IS_NODCE(rdev)) {
6196                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6197                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6198                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6199                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6200                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6201                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6202         }
6203
6204         WREG32(CG_THERMAL_INT, thermal_int);
6205
6206         /* posting read */
6207         RREG32(SRBM_STATUS);
6208
6209         return 0;
6210 }
6211
6212 static inline void si_irq_ack(struct radeon_device *rdev)
6213 {
6214         u32 tmp;
6215
6216         if (ASIC_IS_NODCE(rdev))
6217                 return;
6218
6219         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6220         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6221         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6222         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6223         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6224         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6225         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6226         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6227         if (rdev->num_crtc >= 4) {
6228                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6229                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6230         }
6231         if (rdev->num_crtc >= 6) {
6232                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6233                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6234         }
6235
6236         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6237                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6238         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6239                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6240         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6241                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6242         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6243                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6244         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6245                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6246         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6247                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6248
6249         if (rdev->num_crtc >= 4) {
6250                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6251                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6252                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6253                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6254                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6255                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6256                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6257                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6258                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6259                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6260                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6261                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6262         }
6263
6264         if (rdev->num_crtc >= 6) {
6265                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6266                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6267                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6268                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6269                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6270                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6271                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6272                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6273                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6274                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6275                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6276                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6277         }
6278
6279         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6280                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6281                 tmp |= DC_HPDx_INT_ACK;
6282                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6283         }
6284         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6285                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6286                 tmp |= DC_HPDx_INT_ACK;
6287                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6288         }
6289         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6290                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6291                 tmp |= DC_HPDx_INT_ACK;
6292                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6293         }
6294         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6295                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6296                 tmp |= DC_HPDx_INT_ACK;
6297                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6298         }
6299         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6300                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6301                 tmp |= DC_HPDx_INT_ACK;
6302                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6303         }
6304         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6305                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6306                 tmp |= DC_HPDx_INT_ACK;
6307                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6308         }
6309 }
6310
6311 static void si_irq_disable(struct radeon_device *rdev)
6312 {
6313         si_disable_interrupts(rdev);
6314         /* Wait and acknowledge irq */
6315         mdelay(1);
6316         si_irq_ack(rdev);
6317         si_disable_interrupt_state(rdev);
6318 }
6319
6320 static void si_irq_suspend(struct radeon_device *rdev)
6321 {
6322         si_irq_disable(rdev);
6323         si_rlc_stop(rdev);
6324 }
6325
6326 static void si_irq_fini(struct radeon_device *rdev)
6327 {
6328         si_irq_suspend(rdev);
6329         r600_ih_ring_fini(rdev);
6330 }
6331
6332 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6333 {
6334         u32 wptr, tmp;
6335
6336         if (rdev->wb.enabled)
6337                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6338         else
6339                 wptr = RREG32(IH_RB_WPTR);
6340
6341         if (wptr & RB_OVERFLOW) {
6342                 wptr &= ~RB_OVERFLOW;
6343                 /* When a ring buffer overflow happen start parsing interrupt
6344                  * from the last not overwritten vector (wptr + 16). Hopefully
6345                  * this should allow us to catchup.
6346                  */
6347                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6348                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6349                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6350                 tmp = RREG32(IH_RB_CNTL);
6351                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6352                 WREG32(IH_RB_CNTL, tmp);
6353         }
6354         return (wptr & rdev->ih.ptr_mask);
6355 }
6356
6357 /*        SI IV Ring
6358  * Each IV ring entry is 128 bits:
6359  * [7:0]    - interrupt source id
6360  * [31:8]   - reserved
6361  * [59:32]  - interrupt source data
6362  * [63:60]  - reserved
6363  * [71:64]  - RINGID
6364  * [79:72]  - VMID
6365  * [127:80] - reserved
6366  */
6367 int si_irq_process(struct radeon_device *rdev)
6368 {
6369         u32 wptr;
6370         u32 rptr;
6371         u32 src_id, src_data, ring_id;
6372         u32 ring_index;
6373         bool queue_hotplug = false;
6374         bool queue_thermal = false;
6375         u32 status, addr;
6376
6377         if (!rdev->ih.enabled || rdev->shutdown)
6378                 return IRQ_NONE;
6379
6380         wptr = si_get_ih_wptr(rdev);
6381
6382 restart_ih:
6383         /* is somebody else already processing irqs? */
6384         if (atomic_xchg(&rdev->ih.lock, 1))
6385                 return IRQ_NONE;
6386
6387         rptr = rdev->ih.rptr;
6388         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6389
6390         /* Order reading of wptr vs. reading of IH ring data */
6391         rmb();
6392
6393         /* display interrupts */
6394         si_irq_ack(rdev);
6395
6396         while (rptr != wptr) {
6397                 /* wptr/rptr are in bytes! */
6398                 ring_index = rptr / 4;
6399                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6400                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6401                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6402
6403                 switch (src_id) {
6404                 case 1: /* D1 vblank/vline */
6405                         switch (src_data) {
6406                         case 0: /* D1 vblank */
6407                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6408                                         if (rdev->irq.crtc_vblank_int[0]) {
6409                                                 drm_handle_vblank(rdev->ddev, 0);
6410                                                 rdev->pm.vblank_sync = true;
6411                                                 wake_up(&rdev->irq.vblank_queue);
6412                                         }
6413                                         if (atomic_read(&rdev->irq.pflip[0]))
6414                                                 radeon_crtc_handle_vblank(rdev, 0);
6415                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6416                                         DRM_DEBUG("IH: D1 vblank\n");
6417                                 }
6418                                 break;
6419                         case 1: /* D1 vline */
6420                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6421                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6422                                         DRM_DEBUG("IH: D1 vline\n");
6423                                 }
6424                                 break;
6425                         default:
6426                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6427                                 break;
6428                         }
6429                         break;
6430                 case 2: /* D2 vblank/vline */
6431                         switch (src_data) {
6432                         case 0: /* D2 vblank */
6433                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6434                                         if (rdev->irq.crtc_vblank_int[1]) {
6435                                                 drm_handle_vblank(rdev->ddev, 1);
6436                                                 rdev->pm.vblank_sync = true;
6437                                                 wake_up(&rdev->irq.vblank_queue);
6438                                         }
6439                                         if (atomic_read(&rdev->irq.pflip[1]))
6440                                                 radeon_crtc_handle_vblank(rdev, 1);
6441                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6442                                         DRM_DEBUG("IH: D2 vblank\n");
6443                                 }
6444                                 break;
6445                         case 1: /* D2 vline */
6446                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6447                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6448                                         DRM_DEBUG("IH: D2 vline\n");
6449                                 }
6450                                 break;
6451                         default:
6452                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6453                                 break;
6454                         }
6455                         break;
6456                 case 3: /* D3 vblank/vline */
6457                         switch (src_data) {
6458                         case 0: /* D3 vblank */
6459                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6460                                         if (rdev->irq.crtc_vblank_int[2]) {
6461                                                 drm_handle_vblank(rdev->ddev, 2);
6462                                                 rdev->pm.vblank_sync = true;
6463                                                 wake_up(&rdev->irq.vblank_queue);
6464                                         }
6465                                         if (atomic_read(&rdev->irq.pflip[2]))
6466                                                 radeon_crtc_handle_vblank(rdev, 2);
6467                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6468                                         DRM_DEBUG("IH: D3 vblank\n");
6469                                 }
6470                                 break;
6471                         case 1: /* D3 vline */
6472                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6473                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6474                                         DRM_DEBUG("IH: D3 vline\n");
6475                                 }
6476                                 break;
6477                         default:
6478                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6479                                 break;
6480                         }
6481                         break;
6482                 case 4: /* D4 vblank/vline */
6483                         switch (src_data) {
6484                         case 0: /* D4 vblank */
6485                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6486                                         if (rdev->irq.crtc_vblank_int[3]) {
6487                                                 drm_handle_vblank(rdev->ddev, 3);
6488                                                 rdev->pm.vblank_sync = true;
6489                                                 wake_up(&rdev->irq.vblank_queue);
6490                                         }
6491                                         if (atomic_read(&rdev->irq.pflip[3]))
6492                                                 radeon_crtc_handle_vblank(rdev, 3);
6493                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6494                                         DRM_DEBUG("IH: D4 vblank\n");
6495                                 }
6496                                 break;
6497                         case 1: /* D4 vline */
6498                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6499                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6500                                         DRM_DEBUG("IH: D4 vline\n");
6501                                 }
6502                                 break;
6503                         default:
6504                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6505                                 break;
6506                         }
6507                         break;
6508                 case 5: /* D5 vblank/vline */
6509                         switch (src_data) {
6510                         case 0: /* D5 vblank */
6511                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6512                                         if (rdev->irq.crtc_vblank_int[4]) {
6513                                                 drm_handle_vblank(rdev->ddev, 4);
6514                                                 rdev->pm.vblank_sync = true;
6515                                                 wake_up(&rdev->irq.vblank_queue);
6516                                         }
6517                                         if (atomic_read(&rdev->irq.pflip[4]))
6518                                                 radeon_crtc_handle_vblank(rdev, 4);
6519                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6520                                         DRM_DEBUG("IH: D5 vblank\n");
6521                                 }
6522                                 break;
6523                         case 1: /* D5 vline */
6524                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6525                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6526                                         DRM_DEBUG("IH: D5 vline\n");
6527                                 }
6528                                 break;
6529                         default:
6530                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6531                                 break;
6532                         }
6533                         break;
6534                 case 6: /* D6 vblank/vline */
6535                         switch (src_data) {
6536                         case 0: /* D6 vblank */
6537                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6538                                         if (rdev->irq.crtc_vblank_int[5]) {
6539                                                 drm_handle_vblank(rdev->ddev, 5);
6540                                                 rdev->pm.vblank_sync = true;
6541                                                 wake_up(&rdev->irq.vblank_queue);
6542                                         }
6543                                         if (atomic_read(&rdev->irq.pflip[5]))
6544                                                 radeon_crtc_handle_vblank(rdev, 5);
6545                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6546                                         DRM_DEBUG("IH: D6 vblank\n");
6547                                 }
6548                                 break;
6549                         case 1: /* D6 vline */
6550                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6551                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6552                                         DRM_DEBUG("IH: D6 vline\n");
6553                                 }
6554                                 break;
6555                         default:
6556                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6557                                 break;
6558                         }
6559                         break;
6560                 case 8: /* D1 page flip */
6561                 case 10: /* D2 page flip */
6562                 case 12: /* D3 page flip */
6563                 case 14: /* D4 page flip */
6564                 case 16: /* D5 page flip */
6565                 case 18: /* D6 page flip */
6566                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6567                         if (radeon_use_pflipirq > 0)
6568                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6569                         break;
6570                 case 42: /* HPD hotplug */
6571                         switch (src_data) {
6572                         case 0:
6573                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6574                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6575                                         queue_hotplug = true;
6576                                         DRM_DEBUG("IH: HPD1\n");
6577                                 }
6578                                 break;
6579                         case 1:
6580                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6581                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6582                                         queue_hotplug = true;
6583                                         DRM_DEBUG("IH: HPD2\n");
6584                                 }
6585                                 break;
6586                         case 2:
6587                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6588                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6589                                         queue_hotplug = true;
6590                                         DRM_DEBUG("IH: HPD3\n");
6591                                 }
6592                                 break;
6593                         case 3:
6594                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6595                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6596                                         queue_hotplug = true;
6597                                         DRM_DEBUG("IH: HPD4\n");
6598                                 }
6599                                 break;
6600                         case 4:
6601                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6602                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6603                                         queue_hotplug = true;
6604                                         DRM_DEBUG("IH: HPD5\n");
6605                                 }
6606                                 break;
6607                         case 5:
6608                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6609                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6610                                         queue_hotplug = true;
6611                                         DRM_DEBUG("IH: HPD6\n");
6612                                 }
6613                                 break;
6614                         default:
6615                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6616                                 break;
6617                         }
6618                         break;
6619                 case 96:
6620                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6621                         WREG32(SRBM_INT_ACK, 0x1);
6622                         break;
6623                 case 124: /* UVD */
6624                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6625                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6626                         break;
6627                 case 146:
6628                 case 147:
6629                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6630                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6631                         /* reset addr and status */
6632                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6633                         if (addr == 0x0 && status == 0x0)
6634                                 break;
6635                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6636                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6637                                 addr);
6638                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6639                                 status);
6640                         si_vm_decode_fault(rdev, status, addr);
6641                         break;
6642                 case 176: /* RINGID0 CP_INT */
6643                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6644                         break;
6645                 case 177: /* RINGID1 CP_INT */
6646                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6647                         break;
6648                 case 178: /* RINGID2 CP_INT */
6649                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6650                         break;
6651                 case 181: /* CP EOP event */
6652                         DRM_DEBUG("IH: CP EOP\n");
6653                         switch (ring_id) {
6654                         case 0:
6655                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6656                                 break;
6657                         case 1:
6658                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6659                                 break;
6660                         case 2:
6661                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6662                                 break;
6663                         }
6664                         break;
6665                 case 224: /* DMA trap event */
6666                         DRM_DEBUG("IH: DMA trap\n");
6667                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6668                         break;
6669                 case 230: /* thermal low to high */
6670                         DRM_DEBUG("IH: thermal low to high\n");
6671                         rdev->pm.dpm.thermal.high_to_low = false;
6672                         queue_thermal = true;
6673                         break;
6674                 case 231: /* thermal high to low */
6675                         DRM_DEBUG("IH: thermal high to low\n");
6676                         rdev->pm.dpm.thermal.high_to_low = true;
6677                         queue_thermal = true;
6678                         break;
6679                 case 233: /* GUI IDLE */
6680                         DRM_DEBUG("IH: GUI idle\n");
6681                         break;
6682                 case 244: /* DMA trap event */
6683                         DRM_DEBUG("IH: DMA1 trap\n");
6684                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6685                         break;
6686                 default:
6687                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6688                         break;
6689                 }
6690
6691                 /* wptr/rptr are in bytes! */
6692                 rptr += 16;
6693                 rptr &= rdev->ih.ptr_mask;
6694                 WREG32(IH_RB_RPTR, rptr);
6695         }
6696         if (queue_hotplug)
6697                 schedule_work(&rdev->hotplug_work);
6698         if (queue_thermal && rdev->pm.dpm_enabled)
6699                 schedule_work(&rdev->pm.dpm.thermal.work);
6700         rdev->ih.rptr = rptr;
6701         atomic_set(&rdev->ih.lock, 0);
6702
6703         /* make sure wptr hasn't changed while processing */
6704         wptr = si_get_ih_wptr(rdev);
6705         if (wptr != rptr)
6706                 goto restart_ih;
6707
6708         return IRQ_HANDLED;
6709 }
6710
6711 /*
6712  * startup/shutdown callbacks
6713  */
6714 static int si_startup(struct radeon_device *rdev)
6715 {
6716         struct radeon_ring *ring;
6717         int r;
6718
6719         /* enable pcie gen2/3 link */
6720         si_pcie_gen3_enable(rdev);
6721         /* enable aspm */
6722         si_program_aspm(rdev);
6723
6724         /* scratch needs to be initialized before MC */
6725         r = r600_vram_scratch_init(rdev);
6726         if (r)
6727                 return r;
6728
6729         si_mc_program(rdev);
6730
6731         if (!rdev->pm.dpm_enabled) {
6732                 r = si_mc_load_microcode(rdev);
6733                 if (r) {
6734                         DRM_ERROR("Failed to load MC firmware!\n");
6735                         return r;
6736                 }
6737         }
6738
6739         r = si_pcie_gart_enable(rdev);
6740         if (r)
6741                 return r;
6742         si_gpu_init(rdev);
6743
6744         /* allocate rlc buffers */
6745         if (rdev->family == CHIP_VERDE) {
6746                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6747                 rdev->rlc.reg_list_size =
6748                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6749         }
6750         rdev->rlc.cs_data = si_cs_data;
6751         r = sumo_rlc_init(rdev);
6752         if (r) {
6753                 DRM_ERROR("Failed to init rlc BOs!\n");
6754                 return r;
6755         }
6756
6757         /* allocate wb buffer */
6758         r = radeon_wb_init(rdev);
6759         if (r)
6760                 return r;
6761
6762         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6763         if (r) {
6764                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6765                 return r;
6766         }
6767
6768         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6769         if (r) {
6770                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6771                 return r;
6772         }
6773
6774         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6775         if (r) {
6776                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6777                 return r;
6778         }
6779
6780         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6781         if (r) {
6782                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6783                 return r;
6784         }
6785
6786         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6787         if (r) {
6788                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6789                 return r;
6790         }
6791
6792         if (rdev->has_uvd) {
6793                 r = uvd_v2_2_resume(rdev);
6794                 if (!r) {
6795                         r = radeon_fence_driver_start_ring(rdev,
6796                                                            R600_RING_TYPE_UVD_INDEX);
6797                         if (r)
6798                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6799                 }
6800                 if (r)
6801                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6802         }
6803
6804         /* Enable IRQ */
6805         if (!rdev->irq.installed) {
6806                 r = radeon_irq_kms_init(rdev);
6807                 if (r)
6808                         return r;
6809         }
6810
6811         r = si_irq_init(rdev);
6812         if (r) {
6813                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6814                 radeon_irq_kms_fini(rdev);
6815                 return r;
6816         }
6817         si_irq_set(rdev);
6818
6819         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6820         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6821                              RADEON_CP_PACKET2);
6822         if (r)
6823                 return r;
6824
6825         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6826         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6827                              RADEON_CP_PACKET2);
6828         if (r)
6829                 return r;
6830
6831         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6832         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6833                              RADEON_CP_PACKET2);
6834         if (r)
6835                 return r;
6836
6837         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6838         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6839                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6840         if (r)
6841                 return r;
6842
6843         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6844         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6845                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6846         if (r)
6847                 return r;
6848
6849         r = si_cp_load_microcode(rdev);
6850         if (r)
6851                 return r;
6852         r = si_cp_resume(rdev);
6853         if (r)
6854                 return r;
6855
6856         r = cayman_dma_resume(rdev);
6857         if (r)
6858                 return r;
6859
6860         if (rdev->has_uvd) {
6861                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6862                 if (ring->ring_size) {
6863                         r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6864                                              RADEON_CP_PACKET2);
6865                         if (!r)
6866                                 r = uvd_v1_0_init(rdev);
6867                         if (r)
6868                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6869                 }
6870         }
6871
6872         r = radeon_ib_pool_init(rdev);
6873         if (r) {
6874                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6875                 return r;
6876         }
6877
6878         r = radeon_vm_manager_init(rdev);
6879         if (r) {
6880                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6881                 return r;
6882         }
6883
6884         r = radeon_audio_init(rdev);
6885         if (r)
6886                 return r;
6887
6888         return 0;
6889 }
6890
6891 int si_resume(struct radeon_device *rdev)
6892 {
6893         int r;
6894
6895         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6896          * posting will perform necessary task to bring back GPU into good
6897          * shape.
6898          */
6899         /* post card */
6900         atom_asic_init(rdev->mode_info.atom_context);
6901
6902         /* init golden registers */
6903         si_init_golden_registers(rdev);
6904
6905         if (rdev->pm.pm_method == PM_METHOD_DPM)
6906                 radeon_pm_resume(rdev);
6907
6908         rdev->accel_working = true;
6909         r = si_startup(rdev);
6910         if (r) {
6911                 DRM_ERROR("si startup failed on resume\n");
6912                 rdev->accel_working = false;
6913                 return r;
6914         }
6915
6916         return r;
6917
6918 }
6919
6920 int si_suspend(struct radeon_device *rdev)
6921 {
6922         radeon_pm_suspend(rdev);
6923         radeon_audio_fini(rdev);
6924         radeon_vm_manager_fini(rdev);
6925         si_cp_enable(rdev, false);
6926         cayman_dma_stop(rdev);
6927         if (rdev->has_uvd) {
6928                 uvd_v1_0_fini(rdev);
6929                 radeon_uvd_suspend(rdev);
6930         }
6931         si_fini_pg(rdev);
6932         si_fini_cg(rdev);
6933         si_irq_suspend(rdev);
6934         radeon_wb_disable(rdev);
6935         si_pcie_gart_disable(rdev);
6936         return 0;
6937 }
6938
6939 /* Plan is to move initialization in that function and use
6940  * helper function so that radeon_device_init pretty much
6941  * do nothing more than calling asic specific function. This
6942  * should also allow to remove a bunch of callback function
6943  * like vram_info.
6944  */
6945 int si_init(struct radeon_device *rdev)
6946 {
6947         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6948         int r;
6949
6950         /* Read BIOS */
6951         if (!radeon_get_bios(rdev)) {
6952                 if (ASIC_IS_AVIVO(rdev))
6953                         return -EINVAL;
6954         }
6955         /* Must be an ATOMBIOS */
6956         if (!rdev->is_atom_bios) {
6957                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6958                 return -EINVAL;
6959         }
6960         r = radeon_atombios_init(rdev);
6961         if (r)
6962                 return r;
6963
6964         /* Post card if necessary */
6965         if (!radeon_card_posted(rdev)) {
6966                 if (!rdev->bios) {
6967                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6968                         return -EINVAL;
6969                 }
6970                 DRM_INFO("GPU not posted. posting now...\n");
6971                 atom_asic_init(rdev->mode_info.atom_context);
6972         }
6973         /* init golden registers */
6974         si_init_golden_registers(rdev);
6975         /* Initialize scratch registers */
6976         si_scratch_init(rdev);
6977         /* Initialize surface registers */
6978         radeon_surface_init(rdev);
6979         /* Initialize clocks */
6980         radeon_get_clock_info(rdev->ddev);
6981
6982         /* Fence driver */
6983         r = radeon_fence_driver_init(rdev);
6984         if (r)
6985                 return r;
6986
6987         /* initialize memory controller */
6988         r = si_mc_init(rdev);
6989         if (r)
6990                 return r;
6991         /* Memory manager */
6992         r = radeon_bo_init(rdev);
6993         if (r)
6994                 return r;
6995
6996         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6997             !rdev->rlc_fw || !rdev->mc_fw) {
6998                 r = si_init_microcode(rdev);
6999                 if (r) {
7000                         DRM_ERROR("Failed to load firmware!\n");
7001                         return r;
7002                 }
7003         }
7004
7005         /* Initialize power management */
7006         radeon_pm_init(rdev);
7007
7008         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7009         ring->ring_obj = NULL;
7010         r600_ring_init(rdev, ring, 1024 * 1024);
7011
7012         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7013         ring->ring_obj = NULL;
7014         r600_ring_init(rdev, ring, 1024 * 1024);
7015
7016         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7017         ring->ring_obj = NULL;
7018         r600_ring_init(rdev, ring, 1024 * 1024);
7019
7020         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7021         ring->ring_obj = NULL;
7022         r600_ring_init(rdev, ring, 64 * 1024);
7023
7024         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7025         ring->ring_obj = NULL;
7026         r600_ring_init(rdev, ring, 64 * 1024);
7027
7028         if (rdev->has_uvd) {
7029                 r = radeon_uvd_init(rdev);
7030                 if (!r) {
7031                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7032                         ring->ring_obj = NULL;
7033                         r600_ring_init(rdev, ring, 4096);
7034                 }
7035         }
7036
7037         rdev->ih.ring_obj = NULL;
7038         r600_ih_ring_init(rdev, 64 * 1024);
7039
7040         r = r600_pcie_gart_init(rdev);
7041         if (r)
7042                 return r;
7043
7044         rdev->accel_working = true;
7045         r = si_startup(rdev);
7046         if (r) {
7047                 dev_err(rdev->dev, "disabling GPU acceleration\n");
7048                 si_cp_fini(rdev);
7049                 cayman_dma_fini(rdev);
7050                 si_irq_fini(rdev);
7051                 sumo_rlc_fini(rdev);
7052                 radeon_wb_fini(rdev);
7053                 radeon_ib_pool_fini(rdev);
7054                 radeon_vm_manager_fini(rdev);
7055                 radeon_irq_kms_fini(rdev);
7056                 si_pcie_gart_fini(rdev);
7057                 rdev->accel_working = false;
7058         }
7059
7060         /* Don't start up if the MC ucode is missing.
7061          * The default clocks and voltages before the MC ucode
7062          * is loaded are not suffient for advanced operations.
7063          */
7064         if (!rdev->mc_fw) {
7065                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7066                 return -EINVAL;
7067         }
7068
7069         return 0;
7070 }
7071
7072 void si_fini(struct radeon_device *rdev)
7073 {
7074         radeon_pm_fini(rdev);
7075         si_cp_fini(rdev);
7076         cayman_dma_fini(rdev);
7077         si_fini_pg(rdev);
7078         si_fini_cg(rdev);
7079         si_irq_fini(rdev);
7080         sumo_rlc_fini(rdev);
7081         radeon_wb_fini(rdev);
7082         radeon_vm_manager_fini(rdev);
7083         radeon_ib_pool_fini(rdev);
7084         radeon_irq_kms_fini(rdev);
7085         if (rdev->has_uvd) {
7086                 uvd_v1_0_fini(rdev);
7087                 radeon_uvd_fini(rdev);
7088         }
7089         si_pcie_gart_fini(rdev);
7090         r600_vram_scratch_fini(rdev);
7091         radeon_gem_fini(rdev);
7092         radeon_fence_driver_fini(rdev);
7093         radeon_bo_fini(rdev);
7094         radeon_atombios_fini(rdev);
7095         kfree(rdev->bios);
7096         rdev->bios = NULL;
7097 }
7098
7099 /**
7100  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7101  *
7102  * @rdev: radeon_device pointer
7103  *
7104  * Fetches a GPU clock counter snapshot (SI).
7105  * Returns the 64 bit clock counter snapshot.
7106  */
7107 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7108 {
7109         uint64_t clock;
7110
7111         mutex_lock(&rdev->gpu_clock_mutex);
7112         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7113         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7114                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7115         mutex_unlock(&rdev->gpu_clock_mutex);
7116         return clock;
7117 }
7118
7119 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7120 {
7121         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7122         int r;
7123
7124         /* bypass vclk and dclk with bclk */
7125         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7126                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7127                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7128
7129         /* put PLL in bypass mode */
7130         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7131
7132         if (!vclk || !dclk) {
7133                 /* keep the Bypass mode, put PLL to sleep */
7134                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
7135                 return 0;
7136         }
7137
7138         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7139                                           16384, 0x03FFFFFF, 0, 128, 5,
7140                                           &fb_div, &vclk_div, &dclk_div);
7141         if (r)
7142                 return r;
7143
7144         /* set RESET_ANTI_MUX to 0 */
7145         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7146
7147         /* set VCO_MODE to 1 */
7148         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7149
7150         /* toggle UPLL_SLEEP to 1 then back to 0 */
7151         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
7152         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7153
7154         /* deassert UPLL_RESET */
7155         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7156
7157         mdelay(1);
7158
7159         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7160         if (r)
7161                 return r;
7162
7163         /* assert UPLL_RESET again */
7164         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7165
7166         /* disable spread spectrum. */
7167         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7168
7169         /* set feedback divider */
7170         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7171
7172         /* set ref divider to 0 */
7173         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7174
7175         if (fb_div < 307200)
7176                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7177         else
7178                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7179
7180         /* set PDIV_A and PDIV_B */
7181         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7182                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7183                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7184
7185         /* give the PLL some time to settle */
7186         mdelay(15);
7187
7188         /* deassert PLL_RESET */
7189         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7190
7191         mdelay(15);
7192
7193         /* switch from bypass mode to normal mode */
7194         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7195
7196         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7197         if (r)
7198                 return r;
7199
7200         /* switch VCLK and DCLK selection */
7201         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7202                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7203                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7204
7205         mdelay(100);
7206
7207         return 0;
7208 }
7209
7210 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7211 {
7212         struct pci_dev *root = rdev->pdev->bus->self;
7213         int bridge_pos, gpu_pos;
7214         u32 speed_cntl, mask, current_data_rate;
7215         int ret, i;
7216         u16 tmp16;
7217
7218         if (pci_is_root_bus(rdev->pdev->bus))
7219                 return;
7220
7221         if (radeon_pcie_gen2 == 0)
7222                 return;
7223
7224         if (rdev->flags & RADEON_IS_IGP)
7225                 return;
7226
7227         if (!(rdev->flags & RADEON_IS_PCIE))
7228                 return;
7229
7230         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7231         if (ret != 0)
7232                 return;
7233
7234         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7235                 return;
7236
7237         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7238         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7239                 LC_CURRENT_DATA_RATE_SHIFT;
7240         if (mask & DRM_PCIE_SPEED_80) {
7241                 if (current_data_rate == 2) {
7242                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7243                         return;
7244                 }
7245                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7246         } else if (mask & DRM_PCIE_SPEED_50) {
7247                 if (current_data_rate == 1) {
7248                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7249                         return;
7250                 }
7251                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7252         }
7253
7254         bridge_pos = pci_pcie_cap(root);
7255         if (!bridge_pos)
7256                 return;
7257
7258         gpu_pos = pci_pcie_cap(rdev->pdev);
7259         if (!gpu_pos)
7260                 return;
7261
7262         if (mask & DRM_PCIE_SPEED_80) {
7263                 /* re-try equalization if gen3 is not already enabled */
7264                 if (current_data_rate != 2) {
7265                         u16 bridge_cfg, gpu_cfg;
7266                         u16 bridge_cfg2, gpu_cfg2;
7267                         u32 max_lw, current_lw, tmp;
7268
7269                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7270                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7271
7272                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7273                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7274
7275                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7276                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7277
7278                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7279                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7280                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7281
7282                         if (current_lw < max_lw) {
7283                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7284                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7285                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7286                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7287                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7288                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7289                                 }
7290                         }
7291
7292                         for (i = 0; i < 10; i++) {
7293                                 /* check status */
7294                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7295                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7296                                         break;
7297
7298                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7299                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7300
7301                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7302                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7303
7304                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7305                                 tmp |= LC_SET_QUIESCE;
7306                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7307
7308                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7309                                 tmp |= LC_REDO_EQ;
7310                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7311
7312                                 mdelay(100);
7313
7314                                 /* linkctl */
7315                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7316                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7317                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7318                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7319
7320                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7321                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7322                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7323                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7324
7325                                 /* linkctl2 */
7326                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7327                                 tmp16 &= ~((1 << 4) | (7 << 9));
7328                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7329                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7330
7331                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7332                                 tmp16 &= ~((1 << 4) | (7 << 9));
7333                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7334                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7335
7336                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7337                                 tmp &= ~LC_SET_QUIESCE;
7338                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7339                         }
7340                 }
7341         }
7342
7343         /* set the link speed */
7344         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7345         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7346         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7347
7348         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7349         tmp16 &= ~0xf;
7350         if (mask & DRM_PCIE_SPEED_80)
7351                 tmp16 |= 3; /* gen3 */
7352         else if (mask & DRM_PCIE_SPEED_50)
7353                 tmp16 |= 2; /* gen2 */
7354         else
7355                 tmp16 |= 1; /* gen1 */
7356         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7357
7358         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7359         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7360         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7361
7362         for (i = 0; i < rdev->usec_timeout; i++) {
7363                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7364                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7365                         break;
7366                 udelay(1);
7367         }
7368 }
7369
7370 static void si_program_aspm(struct radeon_device *rdev)
7371 {
7372         u32 data, orig;
7373         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7374         bool disable_clkreq = false;
7375
7376         if (radeon_aspm == 0)
7377                 return;
7378
7379         if (!(rdev->flags & RADEON_IS_PCIE))
7380                 return;
7381
7382         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7383         data &= ~LC_XMIT_N_FTS_MASK;
7384         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7385         if (orig != data)
7386                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7387
7388         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7389         data |= LC_GO_TO_RECOVERY;
7390         if (orig != data)
7391                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7392
7393         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7394         data |= P_IGNORE_EDB_ERR;
7395         if (orig != data)
7396                 WREG32_PCIE(PCIE_P_CNTL, data);
7397
7398         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7399         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7400         data |= LC_PMI_TO_L1_DIS;
7401         if (!disable_l0s)
7402                 data |= LC_L0S_INACTIVITY(7);
7403
7404         if (!disable_l1) {
7405                 data |= LC_L1_INACTIVITY(7);
7406                 data &= ~LC_PMI_TO_L1_DIS;
7407                 if (orig != data)
7408                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7409
7410                 if (!disable_plloff_in_l1) {
7411                         bool clk_req_support;
7412
7413                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7414                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7415                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7416                         if (orig != data)
7417                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7418
7419                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7420                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7421                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7422                         if (orig != data)
7423                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7424
7425                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7426                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7427                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7428                         if (orig != data)
7429                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7430
7431                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7432                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7433                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7434                         if (orig != data)
7435                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7436
7437                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7438                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7439                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7440                                 if (orig != data)
7441                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7442
7443                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7444                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7445                                 if (orig != data)
7446                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7447
7448                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7449                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7450                                 if (orig != data)
7451                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7452
7453                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7454                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7455                                 if (orig != data)
7456                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7457
7458                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7459                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7460                                 if (orig != data)
7461                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7462
7463                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7464                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7465                                 if (orig != data)
7466                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7467
7468                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7469                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7470                                 if (orig != data)
7471                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7472
7473                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7474                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7475                                 if (orig != data)
7476                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7477                         }
7478                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7479                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7480                         data |= LC_DYN_LANES_PWR_STATE(3);
7481                         if (orig != data)
7482                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7483
7484                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7485                         data &= ~LS2_EXIT_TIME_MASK;
7486                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7487                                 data |= LS2_EXIT_TIME(5);
7488                         if (orig != data)
7489                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7490
7491                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7492                         data &= ~LS2_EXIT_TIME_MASK;
7493                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7494                                 data |= LS2_EXIT_TIME(5);
7495                         if (orig != data)
7496                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7497
7498                         if (!disable_clkreq &&
7499                             !pci_is_root_bus(rdev->pdev->bus)) {
7500                                 struct pci_dev *root = rdev->pdev->bus->self;
7501                                 u32 lnkcap;
7502
7503                                 clk_req_support = false;
7504                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7505                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7506                                         clk_req_support = true;
7507                         } else {
7508                                 clk_req_support = false;
7509                         }
7510
7511                         if (clk_req_support) {
7512                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7513                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7514                                 if (orig != data)
7515                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7516
7517                                 orig = data = RREG32(THM_CLK_CNTL);
7518                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7519                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7520                                 if (orig != data)
7521                                         WREG32(THM_CLK_CNTL, data);
7522
7523                                 orig = data = RREG32(MISC_CLK_CNTL);
7524                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7525                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7526                                 if (orig != data)
7527                                         WREG32(MISC_CLK_CNTL, data);
7528
7529                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7530                                 data &= ~BCLK_AS_XCLK;
7531                                 if (orig != data)
7532                                         WREG32(CG_CLKPIN_CNTL, data);
7533
7534                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7535                                 data &= ~FORCE_BIF_REFCLK_EN;
7536                                 if (orig != data)
7537                                         WREG32(CG_CLKPIN_CNTL_2, data);
7538
7539                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7540                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7541                                 data |= MPLL_CLKOUT_SEL(4);
7542                                 if (orig != data)
7543                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7544
7545                                 orig = data = RREG32(SPLL_CNTL_MODE);
7546                                 data &= ~SPLL_REFCLK_SEL_MASK;
7547                                 if (orig != data)
7548                                         WREG32(SPLL_CNTL_MODE, data);
7549                         }
7550                 }
7551         } else {
7552                 if (orig != data)
7553                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7554         }
7555
7556         orig = data = RREG32_PCIE(PCIE_CNTL2);
7557         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7558         if (orig != data)
7559                 WREG32_PCIE(PCIE_CNTL2, data);
7560
7561         if (!disable_l0s) {
7562                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7563                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7564                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7565                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7566                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7567                                 data &= ~LC_L0S_INACTIVITY_MASK;
7568                                 if (orig != data)
7569                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7570                         }
7571                 }
7572         }
7573 }