drm/radeon: Store PCI controller in struct radeon_device.hose
[linux-2.6-microblaze.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include <linux/slab.h>
29
30 #include <drm/drm_vblank.h>
31
32 #include "atom.h"
33 #include "evergreen.h"
34 #include "cik_blit_shaders.h"
35 #include "cik.h"
36 #include "cikd.h"
37 #include "clearstate_ci.h"
38 #include "r600.h"
39 #include "radeon.h"
40 #include "radeon_asic.h"
41 #include "radeon_audio.h"
42 #include "radeon_ucode.h"
43 #include "si.h"
44 #include "vce.h"
45
46 #define SH_MEM_CONFIG_GFX_DEFAULT \
47         ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
48
49 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
50 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
56 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
57 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
58
59 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
60 MODULE_FIRMWARE("radeon/bonaire_me.bin");
61 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
62 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
63 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
64 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
65 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
66 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
67 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
68
69 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
70 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
71 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
72 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
73 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
74 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
75 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
76 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
77 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
78
79 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
80 MODULE_FIRMWARE("radeon/hawaii_me.bin");
81 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
82 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
83 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
84 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
85 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
86 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
87 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
88
89 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
90 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
91 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
92 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
93 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
94 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
95
96 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
97 MODULE_FIRMWARE("radeon/kaveri_me.bin");
98 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
99 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
100 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
101 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
102 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
103
104 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
105 MODULE_FIRMWARE("radeon/KABINI_me.bin");
106 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
107 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
108 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
109 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
110
111 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
112 MODULE_FIRMWARE("radeon/kabini_me.bin");
113 MODULE_FIRMWARE("radeon/kabini_ce.bin");
114 MODULE_FIRMWARE("radeon/kabini_mec.bin");
115 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
116 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
117
118 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
119 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
120 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
121 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
122 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
123 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
124
125 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
126 MODULE_FIRMWARE("radeon/mullins_me.bin");
127 MODULE_FIRMWARE("radeon/mullins_ce.bin");
128 MODULE_FIRMWARE("radeon/mullins_mec.bin");
129 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
130 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
131
132 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
133 static void cik_rlc_stop(struct radeon_device *rdev);
134 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
135 static void cik_program_aspm(struct radeon_device *rdev);
136 static void cik_init_pg(struct radeon_device *rdev);
137 static void cik_init_cg(struct radeon_device *rdev);
138 static void cik_fini_pg(struct radeon_device *rdev);
139 static void cik_fini_cg(struct radeon_device *rdev);
140 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
141                                           bool enable);
142
143 /**
144  * cik_get_allowed_info_register - fetch the register for the info ioctl
145  *
146  * @rdev: radeon_device pointer
147  * @reg: register offset in bytes
148  * @val: register value
149  *
150  * Returns 0 for success or -EINVAL for an invalid register
151  *
152  */
153 int cik_get_allowed_info_register(struct radeon_device *rdev,
154                                   u32 reg, u32 *val)
155 {
156         switch (reg) {
157         case GRBM_STATUS:
158         case GRBM_STATUS2:
159         case GRBM_STATUS_SE0:
160         case GRBM_STATUS_SE1:
161         case GRBM_STATUS_SE2:
162         case GRBM_STATUS_SE3:
163         case SRBM_STATUS:
164         case SRBM_STATUS2:
165         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
166         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
167         case UVD_STATUS:
168         /* TODO VCE */
169                 *val = RREG32(reg);
170                 return 0;
171         default:
172                 return -EINVAL;
173         }
174 }
175
176 /*
177  * Indirect registers accessor
178  */
179 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
180 {
181         unsigned long flags;
182         u32 r;
183
184         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
185         WREG32(CIK_DIDT_IND_INDEX, (reg));
186         r = RREG32(CIK_DIDT_IND_DATA);
187         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
188         return r;
189 }
190
191 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
192 {
193         unsigned long flags;
194
195         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
196         WREG32(CIK_DIDT_IND_INDEX, (reg));
197         WREG32(CIK_DIDT_IND_DATA, (v));
198         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
199 }
200
201 /* get temperature in millidegrees */
202 int ci_get_temp(struct radeon_device *rdev)
203 {
204         u32 temp;
205         int actual_temp = 0;
206
207         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
208                 CTF_TEMP_SHIFT;
209
210         if (temp & 0x200)
211                 actual_temp = 255;
212         else
213                 actual_temp = temp & 0x1ff;
214
215         return actual_temp * 1000;
216 }
217
218 /* get temperature in millidegrees */
219 int kv_get_temp(struct radeon_device *rdev)
220 {
221         u32 temp;
222         int actual_temp = 0;
223
224         temp = RREG32_SMC(0xC0300E0C);
225
226         if (temp)
227                 actual_temp = (temp / 8) - 49;
228         else
229                 actual_temp = 0;
230
231         return actual_temp * 1000;
232 }
233
234 /*
235  * Indirect registers accessor
236  */
237 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
238 {
239         unsigned long flags;
240         u32 r;
241
242         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
243         WREG32(PCIE_INDEX, reg);
244         (void)RREG32(PCIE_INDEX);
245         r = RREG32(PCIE_DATA);
246         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
247         return r;
248 }
249
250 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
251 {
252         unsigned long flags;
253
254         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
255         WREG32(PCIE_INDEX, reg);
256         (void)RREG32(PCIE_INDEX);
257         WREG32(PCIE_DATA, v);
258         (void)RREG32(PCIE_DATA);
259         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
260 }
261
262 static const u32 spectre_rlc_save_restore_register_list[] =
263 {
264         (0x0e00 << 16) | (0xc12c >> 2),
265         0x00000000,
266         (0x0e00 << 16) | (0xc140 >> 2),
267         0x00000000,
268         (0x0e00 << 16) | (0xc150 >> 2),
269         0x00000000,
270         (0x0e00 << 16) | (0xc15c >> 2),
271         0x00000000,
272         (0x0e00 << 16) | (0xc168 >> 2),
273         0x00000000,
274         (0x0e00 << 16) | (0xc170 >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0xc178 >> 2),
277         0x00000000,
278         (0x0e00 << 16) | (0xc204 >> 2),
279         0x00000000,
280         (0x0e00 << 16) | (0xc2b4 >> 2),
281         0x00000000,
282         (0x0e00 << 16) | (0xc2b8 >> 2),
283         0x00000000,
284         (0x0e00 << 16) | (0xc2bc >> 2),
285         0x00000000,
286         (0x0e00 << 16) | (0xc2c0 >> 2),
287         0x00000000,
288         (0x0e00 << 16) | (0x8228 >> 2),
289         0x00000000,
290         (0x0e00 << 16) | (0x829c >> 2),
291         0x00000000,
292         (0x0e00 << 16) | (0x869c >> 2),
293         0x00000000,
294         (0x0600 << 16) | (0x98f4 >> 2),
295         0x00000000,
296         (0x0e00 << 16) | (0x98f8 >> 2),
297         0x00000000,
298         (0x0e00 << 16) | (0x9900 >> 2),
299         0x00000000,
300         (0x0e00 << 16) | (0xc260 >> 2),
301         0x00000000,
302         (0x0e00 << 16) | (0x90e8 >> 2),
303         0x00000000,
304         (0x0e00 << 16) | (0x3c000 >> 2),
305         0x00000000,
306         (0x0e00 << 16) | (0x3c00c >> 2),
307         0x00000000,
308         (0x0e00 << 16) | (0x8c1c >> 2),
309         0x00000000,
310         (0x0e00 << 16) | (0x9700 >> 2),
311         0x00000000,
312         (0x0e00 << 16) | (0xcd20 >> 2),
313         0x00000000,
314         (0x4e00 << 16) | (0xcd20 >> 2),
315         0x00000000,
316         (0x5e00 << 16) | (0xcd20 >> 2),
317         0x00000000,
318         (0x6e00 << 16) | (0xcd20 >> 2),
319         0x00000000,
320         (0x7e00 << 16) | (0xcd20 >> 2),
321         0x00000000,
322         (0x8e00 << 16) | (0xcd20 >> 2),
323         0x00000000,
324         (0x9e00 << 16) | (0xcd20 >> 2),
325         0x00000000,
326         (0xae00 << 16) | (0xcd20 >> 2),
327         0x00000000,
328         (0xbe00 << 16) | (0xcd20 >> 2),
329         0x00000000,
330         (0x0e00 << 16) | (0x89bc >> 2),
331         0x00000000,
332         (0x0e00 << 16) | (0x8900 >> 2),
333         0x00000000,
334         0x3,
335         (0x0e00 << 16) | (0xc130 >> 2),
336         0x00000000,
337         (0x0e00 << 16) | (0xc134 >> 2),
338         0x00000000,
339         (0x0e00 << 16) | (0xc1fc >> 2),
340         0x00000000,
341         (0x0e00 << 16) | (0xc208 >> 2),
342         0x00000000,
343         (0x0e00 << 16) | (0xc264 >> 2),
344         0x00000000,
345         (0x0e00 << 16) | (0xc268 >> 2),
346         0x00000000,
347         (0x0e00 << 16) | (0xc26c >> 2),
348         0x00000000,
349         (0x0e00 << 16) | (0xc270 >> 2),
350         0x00000000,
351         (0x0e00 << 16) | (0xc274 >> 2),
352         0x00000000,
353         (0x0e00 << 16) | (0xc278 >> 2),
354         0x00000000,
355         (0x0e00 << 16) | (0xc27c >> 2),
356         0x00000000,
357         (0x0e00 << 16) | (0xc280 >> 2),
358         0x00000000,
359         (0x0e00 << 16) | (0xc284 >> 2),
360         0x00000000,
361         (0x0e00 << 16) | (0xc288 >> 2),
362         0x00000000,
363         (0x0e00 << 16) | (0xc28c >> 2),
364         0x00000000,
365         (0x0e00 << 16) | (0xc290 >> 2),
366         0x00000000,
367         (0x0e00 << 16) | (0xc294 >> 2),
368         0x00000000,
369         (0x0e00 << 16) | (0xc298 >> 2),
370         0x00000000,
371         (0x0e00 << 16) | (0xc29c >> 2),
372         0x00000000,
373         (0x0e00 << 16) | (0xc2a0 >> 2),
374         0x00000000,
375         (0x0e00 << 16) | (0xc2a4 >> 2),
376         0x00000000,
377         (0x0e00 << 16) | (0xc2a8 >> 2),
378         0x00000000,
379         (0x0e00 << 16) | (0xc2ac  >> 2),
380         0x00000000,
381         (0x0e00 << 16) | (0xc2b0 >> 2),
382         0x00000000,
383         (0x0e00 << 16) | (0x301d0 >> 2),
384         0x00000000,
385         (0x0e00 << 16) | (0x30238 >> 2),
386         0x00000000,
387         (0x0e00 << 16) | (0x30250 >> 2),
388         0x00000000,
389         (0x0e00 << 16) | (0x30254 >> 2),
390         0x00000000,
391         (0x0e00 << 16) | (0x30258 >> 2),
392         0x00000000,
393         (0x0e00 << 16) | (0x3025c >> 2),
394         0x00000000,
395         (0x4e00 << 16) | (0xc900 >> 2),
396         0x00000000,
397         (0x5e00 << 16) | (0xc900 >> 2),
398         0x00000000,
399         (0x6e00 << 16) | (0xc900 >> 2),
400         0x00000000,
401         (0x7e00 << 16) | (0xc900 >> 2),
402         0x00000000,
403         (0x8e00 << 16) | (0xc900 >> 2),
404         0x00000000,
405         (0x9e00 << 16) | (0xc900 >> 2),
406         0x00000000,
407         (0xae00 << 16) | (0xc900 >> 2),
408         0x00000000,
409         (0xbe00 << 16) | (0xc900 >> 2),
410         0x00000000,
411         (0x4e00 << 16) | (0xc904 >> 2),
412         0x00000000,
413         (0x5e00 << 16) | (0xc904 >> 2),
414         0x00000000,
415         (0x6e00 << 16) | (0xc904 >> 2),
416         0x00000000,
417         (0x7e00 << 16) | (0xc904 >> 2),
418         0x00000000,
419         (0x8e00 << 16) | (0xc904 >> 2),
420         0x00000000,
421         (0x9e00 << 16) | (0xc904 >> 2),
422         0x00000000,
423         (0xae00 << 16) | (0xc904 >> 2),
424         0x00000000,
425         (0xbe00 << 16) | (0xc904 >> 2),
426         0x00000000,
427         (0x4e00 << 16) | (0xc908 >> 2),
428         0x00000000,
429         (0x5e00 << 16) | (0xc908 >> 2),
430         0x00000000,
431         (0x6e00 << 16) | (0xc908 >> 2),
432         0x00000000,
433         (0x7e00 << 16) | (0xc908 >> 2),
434         0x00000000,
435         (0x8e00 << 16) | (0xc908 >> 2),
436         0x00000000,
437         (0x9e00 << 16) | (0xc908 >> 2),
438         0x00000000,
439         (0xae00 << 16) | (0xc908 >> 2),
440         0x00000000,
441         (0xbe00 << 16) | (0xc908 >> 2),
442         0x00000000,
443         (0x4e00 << 16) | (0xc90c >> 2),
444         0x00000000,
445         (0x5e00 << 16) | (0xc90c >> 2),
446         0x00000000,
447         (0x6e00 << 16) | (0xc90c >> 2),
448         0x00000000,
449         (0x7e00 << 16) | (0xc90c >> 2),
450         0x00000000,
451         (0x8e00 << 16) | (0xc90c >> 2),
452         0x00000000,
453         (0x9e00 << 16) | (0xc90c >> 2),
454         0x00000000,
455         (0xae00 << 16) | (0xc90c >> 2),
456         0x00000000,
457         (0xbe00 << 16) | (0xc90c >> 2),
458         0x00000000,
459         (0x4e00 << 16) | (0xc910 >> 2),
460         0x00000000,
461         (0x5e00 << 16) | (0xc910 >> 2),
462         0x00000000,
463         (0x6e00 << 16) | (0xc910 >> 2),
464         0x00000000,
465         (0x7e00 << 16) | (0xc910 >> 2),
466         0x00000000,
467         (0x8e00 << 16) | (0xc910 >> 2),
468         0x00000000,
469         (0x9e00 << 16) | (0xc910 >> 2),
470         0x00000000,
471         (0xae00 << 16) | (0xc910 >> 2),
472         0x00000000,
473         (0xbe00 << 16) | (0xc910 >> 2),
474         0x00000000,
475         (0x0e00 << 16) | (0xc99c >> 2),
476         0x00000000,
477         (0x0e00 << 16) | (0x9834 >> 2),
478         0x00000000,
479         (0x0000 << 16) | (0x30f00 >> 2),
480         0x00000000,
481         (0x0001 << 16) | (0x30f00 >> 2),
482         0x00000000,
483         (0x0000 << 16) | (0x30f04 >> 2),
484         0x00000000,
485         (0x0001 << 16) | (0x30f04 >> 2),
486         0x00000000,
487         (0x0000 << 16) | (0x30f08 >> 2),
488         0x00000000,
489         (0x0001 << 16) | (0x30f08 >> 2),
490         0x00000000,
491         (0x0000 << 16) | (0x30f0c >> 2),
492         0x00000000,
493         (0x0001 << 16) | (0x30f0c >> 2),
494         0x00000000,
495         (0x0600 << 16) | (0x9b7c >> 2),
496         0x00000000,
497         (0x0e00 << 16) | (0x8a14 >> 2),
498         0x00000000,
499         (0x0e00 << 16) | (0x8a18 >> 2),
500         0x00000000,
501         (0x0600 << 16) | (0x30a00 >> 2),
502         0x00000000,
503         (0x0e00 << 16) | (0x8bf0 >> 2),
504         0x00000000,
505         (0x0e00 << 16) | (0x8bcc >> 2),
506         0x00000000,
507         (0x0e00 << 16) | (0x8b24 >> 2),
508         0x00000000,
509         (0x0e00 << 16) | (0x30a04 >> 2),
510         0x00000000,
511         (0x0600 << 16) | (0x30a10 >> 2),
512         0x00000000,
513         (0x0600 << 16) | (0x30a14 >> 2),
514         0x00000000,
515         (0x0600 << 16) | (0x30a18 >> 2),
516         0x00000000,
517         (0x0600 << 16) | (0x30a2c >> 2),
518         0x00000000,
519         (0x0e00 << 16) | (0xc700 >> 2),
520         0x00000000,
521         (0x0e00 << 16) | (0xc704 >> 2),
522         0x00000000,
523         (0x0e00 << 16) | (0xc708 >> 2),
524         0x00000000,
525         (0x0e00 << 16) | (0xc768 >> 2),
526         0x00000000,
527         (0x0400 << 16) | (0xc770 >> 2),
528         0x00000000,
529         (0x0400 << 16) | (0xc774 >> 2),
530         0x00000000,
531         (0x0400 << 16) | (0xc778 >> 2),
532         0x00000000,
533         (0x0400 << 16) | (0xc77c >> 2),
534         0x00000000,
535         (0x0400 << 16) | (0xc780 >> 2),
536         0x00000000,
537         (0x0400 << 16) | (0xc784 >> 2),
538         0x00000000,
539         (0x0400 << 16) | (0xc788 >> 2),
540         0x00000000,
541         (0x0400 << 16) | (0xc78c >> 2),
542         0x00000000,
543         (0x0400 << 16) | (0xc798 >> 2),
544         0x00000000,
545         (0x0400 << 16) | (0xc79c >> 2),
546         0x00000000,
547         (0x0400 << 16) | (0xc7a0 >> 2),
548         0x00000000,
549         (0x0400 << 16) | (0xc7a4 >> 2),
550         0x00000000,
551         (0x0400 << 16) | (0xc7a8 >> 2),
552         0x00000000,
553         (0x0400 << 16) | (0xc7ac >> 2),
554         0x00000000,
555         (0x0400 << 16) | (0xc7b0 >> 2),
556         0x00000000,
557         (0x0400 << 16) | (0xc7b4 >> 2),
558         0x00000000,
559         (0x0e00 << 16) | (0x9100 >> 2),
560         0x00000000,
561         (0x0e00 << 16) | (0x3c010 >> 2),
562         0x00000000,
563         (0x0e00 << 16) | (0x92a8 >> 2),
564         0x00000000,
565         (0x0e00 << 16) | (0x92ac >> 2),
566         0x00000000,
567         (0x0e00 << 16) | (0x92b4 >> 2),
568         0x00000000,
569         (0x0e00 << 16) | (0x92b8 >> 2),
570         0x00000000,
571         (0x0e00 << 16) | (0x92bc >> 2),
572         0x00000000,
573         (0x0e00 << 16) | (0x92c0 >> 2),
574         0x00000000,
575         (0x0e00 << 16) | (0x92c4 >> 2),
576         0x00000000,
577         (0x0e00 << 16) | (0x92c8 >> 2),
578         0x00000000,
579         (0x0e00 << 16) | (0x92cc >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0x92d0 >> 2),
582         0x00000000,
583         (0x0e00 << 16) | (0x8c00 >> 2),
584         0x00000000,
585         (0x0e00 << 16) | (0x8c04 >> 2),
586         0x00000000,
587         (0x0e00 << 16) | (0x8c20 >> 2),
588         0x00000000,
589         (0x0e00 << 16) | (0x8c38 >> 2),
590         0x00000000,
591         (0x0e00 << 16) | (0x8c3c >> 2),
592         0x00000000,
593         (0x0e00 << 16) | (0xae00 >> 2),
594         0x00000000,
595         (0x0e00 << 16) | (0x9604 >> 2),
596         0x00000000,
597         (0x0e00 << 16) | (0xac08 >> 2),
598         0x00000000,
599         (0x0e00 << 16) | (0xac0c >> 2),
600         0x00000000,
601         (0x0e00 << 16) | (0xac10 >> 2),
602         0x00000000,
603         (0x0e00 << 16) | (0xac14 >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0xac58 >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0xac68 >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0xac6c >> 2),
610         0x00000000,
611         (0x0e00 << 16) | (0xac70 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0xac74 >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0xac78 >> 2),
616         0x00000000,
617         (0x0e00 << 16) | (0xac7c >> 2),
618         0x00000000,
619         (0x0e00 << 16) | (0xac80 >> 2),
620         0x00000000,
621         (0x0e00 << 16) | (0xac84 >> 2),
622         0x00000000,
623         (0x0e00 << 16) | (0xac88 >> 2),
624         0x00000000,
625         (0x0e00 << 16) | (0xac8c >> 2),
626         0x00000000,
627         (0x0e00 << 16) | (0x970c >> 2),
628         0x00000000,
629         (0x0e00 << 16) | (0x9714 >> 2),
630         0x00000000,
631         (0x0e00 << 16) | (0x9718 >> 2),
632         0x00000000,
633         (0x0e00 << 16) | (0x971c >> 2),
634         0x00000000,
635         (0x0e00 << 16) | (0x31068 >> 2),
636         0x00000000,
637         (0x4e00 << 16) | (0x31068 >> 2),
638         0x00000000,
639         (0x5e00 << 16) | (0x31068 >> 2),
640         0x00000000,
641         (0x6e00 << 16) | (0x31068 >> 2),
642         0x00000000,
643         (0x7e00 << 16) | (0x31068 >> 2),
644         0x00000000,
645         (0x8e00 << 16) | (0x31068 >> 2),
646         0x00000000,
647         (0x9e00 << 16) | (0x31068 >> 2),
648         0x00000000,
649         (0xae00 << 16) | (0x31068 >> 2),
650         0x00000000,
651         (0xbe00 << 16) | (0x31068 >> 2),
652         0x00000000,
653         (0x0e00 << 16) | (0xcd10 >> 2),
654         0x00000000,
655         (0x0e00 << 16) | (0xcd14 >> 2),
656         0x00000000,
657         (0x0e00 << 16) | (0x88b0 >> 2),
658         0x00000000,
659         (0x0e00 << 16) | (0x88b4 >> 2),
660         0x00000000,
661         (0x0e00 << 16) | (0x88b8 >> 2),
662         0x00000000,
663         (0x0e00 << 16) | (0x88bc >> 2),
664         0x00000000,
665         (0x0400 << 16) | (0x89c0 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0x88c4 >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0x88c8 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0x88d0 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0x88d4 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0x88d8 >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0x8980 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0x30938 >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0x3093c >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0x30940 >> 2),
684         0x00000000,
685         (0x0e00 << 16) | (0x89a0 >> 2),
686         0x00000000,
687         (0x0e00 << 16) | (0x30900 >> 2),
688         0x00000000,
689         (0x0e00 << 16) | (0x30904 >> 2),
690         0x00000000,
691         (0x0e00 << 16) | (0x89b4 >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0x3c210 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0x3c214 >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0x3c218 >> 2),
698         0x00000000,
699         (0x0e00 << 16) | (0x8904 >> 2),
700         0x00000000,
701         0x5,
702         (0x0e00 << 16) | (0x8c28 >> 2),
703         (0x0e00 << 16) | (0x8c2c >> 2),
704         (0x0e00 << 16) | (0x8c30 >> 2),
705         (0x0e00 << 16) | (0x8c34 >> 2),
706         (0x0e00 << 16) | (0x9600 >> 2),
707 };
708
709 static const u32 kalindi_rlc_save_restore_register_list[] =
710 {
711         (0x0e00 << 16) | (0xc12c >> 2),
712         0x00000000,
713         (0x0e00 << 16) | (0xc140 >> 2),
714         0x00000000,
715         (0x0e00 << 16) | (0xc150 >> 2),
716         0x00000000,
717         (0x0e00 << 16) | (0xc15c >> 2),
718         0x00000000,
719         (0x0e00 << 16) | (0xc168 >> 2),
720         0x00000000,
721         (0x0e00 << 16) | (0xc170 >> 2),
722         0x00000000,
723         (0x0e00 << 16) | (0xc204 >> 2),
724         0x00000000,
725         (0x0e00 << 16) | (0xc2b4 >> 2),
726         0x00000000,
727         (0x0e00 << 16) | (0xc2b8 >> 2),
728         0x00000000,
729         (0x0e00 << 16) | (0xc2bc >> 2),
730         0x00000000,
731         (0x0e00 << 16) | (0xc2c0 >> 2),
732         0x00000000,
733         (0x0e00 << 16) | (0x8228 >> 2),
734         0x00000000,
735         (0x0e00 << 16) | (0x829c >> 2),
736         0x00000000,
737         (0x0e00 << 16) | (0x869c >> 2),
738         0x00000000,
739         (0x0600 << 16) | (0x98f4 >> 2),
740         0x00000000,
741         (0x0e00 << 16) | (0x98f8 >> 2),
742         0x00000000,
743         (0x0e00 << 16) | (0x9900 >> 2),
744         0x00000000,
745         (0x0e00 << 16) | (0xc260 >> 2),
746         0x00000000,
747         (0x0e00 << 16) | (0x90e8 >> 2),
748         0x00000000,
749         (0x0e00 << 16) | (0x3c000 >> 2),
750         0x00000000,
751         (0x0e00 << 16) | (0x3c00c >> 2),
752         0x00000000,
753         (0x0e00 << 16) | (0x8c1c >> 2),
754         0x00000000,
755         (0x0e00 << 16) | (0x9700 >> 2),
756         0x00000000,
757         (0x0e00 << 16) | (0xcd20 >> 2),
758         0x00000000,
759         (0x4e00 << 16) | (0xcd20 >> 2),
760         0x00000000,
761         (0x5e00 << 16) | (0xcd20 >> 2),
762         0x00000000,
763         (0x6e00 << 16) | (0xcd20 >> 2),
764         0x00000000,
765         (0x7e00 << 16) | (0xcd20 >> 2),
766         0x00000000,
767         (0x0e00 << 16) | (0x89bc >> 2),
768         0x00000000,
769         (0x0e00 << 16) | (0x8900 >> 2),
770         0x00000000,
771         0x3,
772         (0x0e00 << 16) | (0xc130 >> 2),
773         0x00000000,
774         (0x0e00 << 16) | (0xc134 >> 2),
775         0x00000000,
776         (0x0e00 << 16) | (0xc1fc >> 2),
777         0x00000000,
778         (0x0e00 << 16) | (0xc208 >> 2),
779         0x00000000,
780         (0x0e00 << 16) | (0xc264 >> 2),
781         0x00000000,
782         (0x0e00 << 16) | (0xc268 >> 2),
783         0x00000000,
784         (0x0e00 << 16) | (0xc26c >> 2),
785         0x00000000,
786         (0x0e00 << 16) | (0xc270 >> 2),
787         0x00000000,
788         (0x0e00 << 16) | (0xc274 >> 2),
789         0x00000000,
790         (0x0e00 << 16) | (0xc28c >> 2),
791         0x00000000,
792         (0x0e00 << 16) | (0xc290 >> 2),
793         0x00000000,
794         (0x0e00 << 16) | (0xc294 >> 2),
795         0x00000000,
796         (0x0e00 << 16) | (0xc298 >> 2),
797         0x00000000,
798         (0x0e00 << 16) | (0xc2a0 >> 2),
799         0x00000000,
800         (0x0e00 << 16) | (0xc2a4 >> 2),
801         0x00000000,
802         (0x0e00 << 16) | (0xc2a8 >> 2),
803         0x00000000,
804         (0x0e00 << 16) | (0xc2ac >> 2),
805         0x00000000,
806         (0x0e00 << 16) | (0x301d0 >> 2),
807         0x00000000,
808         (0x0e00 << 16) | (0x30238 >> 2),
809         0x00000000,
810         (0x0e00 << 16) | (0x30250 >> 2),
811         0x00000000,
812         (0x0e00 << 16) | (0x30254 >> 2),
813         0x00000000,
814         (0x0e00 << 16) | (0x30258 >> 2),
815         0x00000000,
816         (0x0e00 << 16) | (0x3025c >> 2),
817         0x00000000,
818         (0x4e00 << 16) | (0xc900 >> 2),
819         0x00000000,
820         (0x5e00 << 16) | (0xc900 >> 2),
821         0x00000000,
822         (0x6e00 << 16) | (0xc900 >> 2),
823         0x00000000,
824         (0x7e00 << 16) | (0xc900 >> 2),
825         0x00000000,
826         (0x4e00 << 16) | (0xc904 >> 2),
827         0x00000000,
828         (0x5e00 << 16) | (0xc904 >> 2),
829         0x00000000,
830         (0x6e00 << 16) | (0xc904 >> 2),
831         0x00000000,
832         (0x7e00 << 16) | (0xc904 >> 2),
833         0x00000000,
834         (0x4e00 << 16) | (0xc908 >> 2),
835         0x00000000,
836         (0x5e00 << 16) | (0xc908 >> 2),
837         0x00000000,
838         (0x6e00 << 16) | (0xc908 >> 2),
839         0x00000000,
840         (0x7e00 << 16) | (0xc908 >> 2),
841         0x00000000,
842         (0x4e00 << 16) | (0xc90c >> 2),
843         0x00000000,
844         (0x5e00 << 16) | (0xc90c >> 2),
845         0x00000000,
846         (0x6e00 << 16) | (0xc90c >> 2),
847         0x00000000,
848         (0x7e00 << 16) | (0xc90c >> 2),
849         0x00000000,
850         (0x4e00 << 16) | (0xc910 >> 2),
851         0x00000000,
852         (0x5e00 << 16) | (0xc910 >> 2),
853         0x00000000,
854         (0x6e00 << 16) | (0xc910 >> 2),
855         0x00000000,
856         (0x7e00 << 16) | (0xc910 >> 2),
857         0x00000000,
858         (0x0e00 << 16) | (0xc99c >> 2),
859         0x00000000,
860         (0x0e00 << 16) | (0x9834 >> 2),
861         0x00000000,
862         (0x0000 << 16) | (0x30f00 >> 2),
863         0x00000000,
864         (0x0000 << 16) | (0x30f04 >> 2),
865         0x00000000,
866         (0x0000 << 16) | (0x30f08 >> 2),
867         0x00000000,
868         (0x0000 << 16) | (0x30f0c >> 2),
869         0x00000000,
870         (0x0600 << 16) | (0x9b7c >> 2),
871         0x00000000,
872         (0x0e00 << 16) | (0x8a14 >> 2),
873         0x00000000,
874         (0x0e00 << 16) | (0x8a18 >> 2),
875         0x00000000,
876         (0x0600 << 16) | (0x30a00 >> 2),
877         0x00000000,
878         (0x0e00 << 16) | (0x8bf0 >> 2),
879         0x00000000,
880         (0x0e00 << 16) | (0x8bcc >> 2),
881         0x00000000,
882         (0x0e00 << 16) | (0x8b24 >> 2),
883         0x00000000,
884         (0x0e00 << 16) | (0x30a04 >> 2),
885         0x00000000,
886         (0x0600 << 16) | (0x30a10 >> 2),
887         0x00000000,
888         (0x0600 << 16) | (0x30a14 >> 2),
889         0x00000000,
890         (0x0600 << 16) | (0x30a18 >> 2),
891         0x00000000,
892         (0x0600 << 16) | (0x30a2c >> 2),
893         0x00000000,
894         (0x0e00 << 16) | (0xc700 >> 2),
895         0x00000000,
896         (0x0e00 << 16) | (0xc704 >> 2),
897         0x00000000,
898         (0x0e00 << 16) | (0xc708 >> 2),
899         0x00000000,
900         (0x0e00 << 16) | (0xc768 >> 2),
901         0x00000000,
902         (0x0400 << 16) | (0xc770 >> 2),
903         0x00000000,
904         (0x0400 << 16) | (0xc774 >> 2),
905         0x00000000,
906         (0x0400 << 16) | (0xc798 >> 2),
907         0x00000000,
908         (0x0400 << 16) | (0xc79c >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0x9100 >> 2),
911         0x00000000,
912         (0x0e00 << 16) | (0x3c010 >> 2),
913         0x00000000,
914         (0x0e00 << 16) | (0x8c00 >> 2),
915         0x00000000,
916         (0x0e00 << 16) | (0x8c04 >> 2),
917         0x00000000,
918         (0x0e00 << 16) | (0x8c20 >> 2),
919         0x00000000,
920         (0x0e00 << 16) | (0x8c38 >> 2),
921         0x00000000,
922         (0x0e00 << 16) | (0x8c3c >> 2),
923         0x00000000,
924         (0x0e00 << 16) | (0xae00 >> 2),
925         0x00000000,
926         (0x0e00 << 16) | (0x9604 >> 2),
927         0x00000000,
928         (0x0e00 << 16) | (0xac08 >> 2),
929         0x00000000,
930         (0x0e00 << 16) | (0xac0c >> 2),
931         0x00000000,
932         (0x0e00 << 16) | (0xac10 >> 2),
933         0x00000000,
934         (0x0e00 << 16) | (0xac14 >> 2),
935         0x00000000,
936         (0x0e00 << 16) | (0xac58 >> 2),
937         0x00000000,
938         (0x0e00 << 16) | (0xac68 >> 2),
939         0x00000000,
940         (0x0e00 << 16) | (0xac6c >> 2),
941         0x00000000,
942         (0x0e00 << 16) | (0xac70 >> 2),
943         0x00000000,
944         (0x0e00 << 16) | (0xac74 >> 2),
945         0x00000000,
946         (0x0e00 << 16) | (0xac78 >> 2),
947         0x00000000,
948         (0x0e00 << 16) | (0xac7c >> 2),
949         0x00000000,
950         (0x0e00 << 16) | (0xac80 >> 2),
951         0x00000000,
952         (0x0e00 << 16) | (0xac84 >> 2),
953         0x00000000,
954         (0x0e00 << 16) | (0xac88 >> 2),
955         0x00000000,
956         (0x0e00 << 16) | (0xac8c >> 2),
957         0x00000000,
958         (0x0e00 << 16) | (0x970c >> 2),
959         0x00000000,
960         (0x0e00 << 16) | (0x9714 >> 2),
961         0x00000000,
962         (0x0e00 << 16) | (0x9718 >> 2),
963         0x00000000,
964         (0x0e00 << 16) | (0x971c >> 2),
965         0x00000000,
966         (0x0e00 << 16) | (0x31068 >> 2),
967         0x00000000,
968         (0x4e00 << 16) | (0x31068 >> 2),
969         0x00000000,
970         (0x5e00 << 16) | (0x31068 >> 2),
971         0x00000000,
972         (0x6e00 << 16) | (0x31068 >> 2),
973         0x00000000,
974         (0x7e00 << 16) | (0x31068 >> 2),
975         0x00000000,
976         (0x0e00 << 16) | (0xcd10 >> 2),
977         0x00000000,
978         (0x0e00 << 16) | (0xcd14 >> 2),
979         0x00000000,
980         (0x0e00 << 16) | (0x88b0 >> 2),
981         0x00000000,
982         (0x0e00 << 16) | (0x88b4 >> 2),
983         0x00000000,
984         (0x0e00 << 16) | (0x88b8 >> 2),
985         0x00000000,
986         (0x0e00 << 16) | (0x88bc >> 2),
987         0x00000000,
988         (0x0400 << 16) | (0x89c0 >> 2),
989         0x00000000,
990         (0x0e00 << 16) | (0x88c4 >> 2),
991         0x00000000,
992         (0x0e00 << 16) | (0x88c8 >> 2),
993         0x00000000,
994         (0x0e00 << 16) | (0x88d0 >> 2),
995         0x00000000,
996         (0x0e00 << 16) | (0x88d4 >> 2),
997         0x00000000,
998         (0x0e00 << 16) | (0x88d8 >> 2),
999         0x00000000,
1000         (0x0e00 << 16) | (0x8980 >> 2),
1001         0x00000000,
1002         (0x0e00 << 16) | (0x30938 >> 2),
1003         0x00000000,
1004         (0x0e00 << 16) | (0x3093c >> 2),
1005         0x00000000,
1006         (0x0e00 << 16) | (0x30940 >> 2),
1007         0x00000000,
1008         (0x0e00 << 16) | (0x89a0 >> 2),
1009         0x00000000,
1010         (0x0e00 << 16) | (0x30900 >> 2),
1011         0x00000000,
1012         (0x0e00 << 16) | (0x30904 >> 2),
1013         0x00000000,
1014         (0x0e00 << 16) | (0x89b4 >> 2),
1015         0x00000000,
1016         (0x0e00 << 16) | (0x3e1fc >> 2),
1017         0x00000000,
1018         (0x0e00 << 16) | (0x3c210 >> 2),
1019         0x00000000,
1020         (0x0e00 << 16) | (0x3c214 >> 2),
1021         0x00000000,
1022         (0x0e00 << 16) | (0x3c218 >> 2),
1023         0x00000000,
1024         (0x0e00 << 16) | (0x8904 >> 2),
1025         0x00000000,
1026         0x5,
1027         (0x0e00 << 16) | (0x8c28 >> 2),
1028         (0x0e00 << 16) | (0x8c2c >> 2),
1029         (0x0e00 << 16) | (0x8c30 >> 2),
1030         (0x0e00 << 16) | (0x8c34 >> 2),
1031         (0x0e00 << 16) | (0x9600 >> 2),
1032 };
1033
1034 static const u32 bonaire_golden_spm_registers[] =
1035 {
1036         0x30800, 0xe0ffffff, 0xe0000000
1037 };
1038
1039 static const u32 bonaire_golden_common_registers[] =
1040 {
1041         0xc770, 0xffffffff, 0x00000800,
1042         0xc774, 0xffffffff, 0x00000800,
1043         0xc798, 0xffffffff, 0x00007fbf,
1044         0xc79c, 0xffffffff, 0x00007faf
1045 };
1046
1047 static const u32 bonaire_golden_registers[] =
1048 {
1049         0x3354, 0x00000333, 0x00000333,
1050         0x3350, 0x000c0fc0, 0x00040200,
1051         0x9a10, 0x00010000, 0x00058208,
1052         0x3c000, 0xffff1fff, 0x00140000,
1053         0x3c200, 0xfdfc0fff, 0x00000100,
1054         0x3c234, 0x40000000, 0x40000200,
1055         0x9830, 0xffffffff, 0x00000000,
1056         0x9834, 0xf00fffff, 0x00000400,
1057         0x9838, 0x0002021c, 0x00020200,
1058         0xc78, 0x00000080, 0x00000000,
1059         0x5bb0, 0x000000f0, 0x00000070,
1060         0x5bc0, 0xf0311fff, 0x80300000,
1061         0x98f8, 0x73773777, 0x12010001,
1062         0x350c, 0x00810000, 0x408af000,
1063         0x7030, 0x31000111, 0x00000011,
1064         0x2f48, 0x73773777, 0x12010001,
1065         0x220c, 0x00007fb6, 0x0021a1b1,
1066         0x2210, 0x00007fb6, 0x002021b1,
1067         0x2180, 0x00007fb6, 0x00002191,
1068         0x2218, 0x00007fb6, 0x002121b1,
1069         0x221c, 0x00007fb6, 0x002021b1,
1070         0x21dc, 0x00007fb6, 0x00002191,
1071         0x21e0, 0x00007fb6, 0x00002191,
1072         0x3628, 0x0000003f, 0x0000000a,
1073         0x362c, 0x0000003f, 0x0000000a,
1074         0x2ae4, 0x00073ffe, 0x000022a2,
1075         0x240c, 0x000007ff, 0x00000000,
1076         0x8a14, 0xf000003f, 0x00000007,
1077         0x8bf0, 0x00002001, 0x00000001,
1078         0x8b24, 0xffffffff, 0x00ffffff,
1079         0x30a04, 0x0000ff0f, 0x00000000,
1080         0x28a4c, 0x07ffffff, 0x06000000,
1081         0x4d8, 0x00000fff, 0x00000100,
1082         0x3e78, 0x00000001, 0x00000002,
1083         0x9100, 0x03000000, 0x0362c688,
1084         0x8c00, 0x000000ff, 0x00000001,
1085         0xe40, 0x00001fff, 0x00001fff,
1086         0x9060, 0x0000007f, 0x00000020,
1087         0x9508, 0x00010000, 0x00010000,
1088         0xac14, 0x000003ff, 0x000000f3,
1089         0xac0c, 0xffffffff, 0x00001032
1090 };
1091
1092 static const u32 bonaire_mgcg_cgcg_init[] =
1093 {
1094         0xc420, 0xffffffff, 0xfffffffc,
1095         0x30800, 0xffffffff, 0xe0000000,
1096         0x3c2a0, 0xffffffff, 0x00000100,
1097         0x3c208, 0xffffffff, 0x00000100,
1098         0x3c2c0, 0xffffffff, 0xc0000100,
1099         0x3c2c8, 0xffffffff, 0xc0000100,
1100         0x3c2c4, 0xffffffff, 0xc0000100,
1101         0x55e4, 0xffffffff, 0x00600100,
1102         0x3c280, 0xffffffff, 0x00000100,
1103         0x3c214, 0xffffffff, 0x06000100,
1104         0x3c220, 0xffffffff, 0x00000100,
1105         0x3c218, 0xffffffff, 0x06000100,
1106         0x3c204, 0xffffffff, 0x00000100,
1107         0x3c2e0, 0xffffffff, 0x00000100,
1108         0x3c224, 0xffffffff, 0x00000100,
1109         0x3c200, 0xffffffff, 0x00000100,
1110         0x3c230, 0xffffffff, 0x00000100,
1111         0x3c234, 0xffffffff, 0x00000100,
1112         0x3c250, 0xffffffff, 0x00000100,
1113         0x3c254, 0xffffffff, 0x00000100,
1114         0x3c258, 0xffffffff, 0x00000100,
1115         0x3c25c, 0xffffffff, 0x00000100,
1116         0x3c260, 0xffffffff, 0x00000100,
1117         0x3c27c, 0xffffffff, 0x00000100,
1118         0x3c278, 0xffffffff, 0x00000100,
1119         0x3c210, 0xffffffff, 0x06000100,
1120         0x3c290, 0xffffffff, 0x00000100,
1121         0x3c274, 0xffffffff, 0x00000100,
1122         0x3c2b4, 0xffffffff, 0x00000100,
1123         0x3c2b0, 0xffffffff, 0x00000100,
1124         0x3c270, 0xffffffff, 0x00000100,
1125         0x30800, 0xffffffff, 0xe0000000,
1126         0x3c020, 0xffffffff, 0x00010000,
1127         0x3c024, 0xffffffff, 0x00030002,
1128         0x3c028, 0xffffffff, 0x00040007,
1129         0x3c02c, 0xffffffff, 0x00060005,
1130         0x3c030, 0xffffffff, 0x00090008,
1131         0x3c034, 0xffffffff, 0x00010000,
1132         0x3c038, 0xffffffff, 0x00030002,
1133         0x3c03c, 0xffffffff, 0x00040007,
1134         0x3c040, 0xffffffff, 0x00060005,
1135         0x3c044, 0xffffffff, 0x00090008,
1136         0x3c048, 0xffffffff, 0x00010000,
1137         0x3c04c, 0xffffffff, 0x00030002,
1138         0x3c050, 0xffffffff, 0x00040007,
1139         0x3c054, 0xffffffff, 0x00060005,
1140         0x3c058, 0xffffffff, 0x00090008,
1141         0x3c05c, 0xffffffff, 0x00010000,
1142         0x3c060, 0xffffffff, 0x00030002,
1143         0x3c064, 0xffffffff, 0x00040007,
1144         0x3c068, 0xffffffff, 0x00060005,
1145         0x3c06c, 0xffffffff, 0x00090008,
1146         0x3c070, 0xffffffff, 0x00010000,
1147         0x3c074, 0xffffffff, 0x00030002,
1148         0x3c078, 0xffffffff, 0x00040007,
1149         0x3c07c, 0xffffffff, 0x00060005,
1150         0x3c080, 0xffffffff, 0x00090008,
1151         0x3c084, 0xffffffff, 0x00010000,
1152         0x3c088, 0xffffffff, 0x00030002,
1153         0x3c08c, 0xffffffff, 0x00040007,
1154         0x3c090, 0xffffffff, 0x00060005,
1155         0x3c094, 0xffffffff, 0x00090008,
1156         0x3c098, 0xffffffff, 0x00010000,
1157         0x3c09c, 0xffffffff, 0x00030002,
1158         0x3c0a0, 0xffffffff, 0x00040007,
1159         0x3c0a4, 0xffffffff, 0x00060005,
1160         0x3c0a8, 0xffffffff, 0x00090008,
1161         0x3c000, 0xffffffff, 0x96e00200,
1162         0x8708, 0xffffffff, 0x00900100,
1163         0xc424, 0xffffffff, 0x0020003f,
1164         0x38, 0xffffffff, 0x0140001c,
1165         0x3c, 0x000f0000, 0x000f0000,
1166         0x220, 0xffffffff, 0xC060000C,
1167         0x224, 0xc0000fff, 0x00000100,
1168         0xf90, 0xffffffff, 0x00000100,
1169         0xf98, 0x00000101, 0x00000000,
1170         0x20a8, 0xffffffff, 0x00000104,
1171         0x55e4, 0xff000fff, 0x00000100,
1172         0x30cc, 0xc0000fff, 0x00000104,
1173         0xc1e4, 0x00000001, 0x00000001,
1174         0xd00c, 0xff000ff0, 0x00000100,
1175         0xd80c, 0xff000ff0, 0x00000100
1176 };
1177
1178 static const u32 spectre_golden_spm_registers[] =
1179 {
1180         0x30800, 0xe0ffffff, 0xe0000000
1181 };
1182
1183 static const u32 spectre_golden_common_registers[] =
1184 {
1185         0xc770, 0xffffffff, 0x00000800,
1186         0xc774, 0xffffffff, 0x00000800,
1187         0xc798, 0xffffffff, 0x00007fbf,
1188         0xc79c, 0xffffffff, 0x00007faf
1189 };
1190
1191 static const u32 spectre_golden_registers[] =
1192 {
1193         0x3c000, 0xffff1fff, 0x96940200,
1194         0x3c00c, 0xffff0001, 0xff000000,
1195         0x3c200, 0xfffc0fff, 0x00000100,
1196         0x6ed8, 0x00010101, 0x00010000,
1197         0x9834, 0xf00fffff, 0x00000400,
1198         0x9838, 0xfffffffc, 0x00020200,
1199         0x5bb0, 0x000000f0, 0x00000070,
1200         0x5bc0, 0xf0311fff, 0x80300000,
1201         0x98f8, 0x73773777, 0x12010001,
1202         0x9b7c, 0x00ff0000, 0x00fc0000,
1203         0x2f48, 0x73773777, 0x12010001,
1204         0x8a14, 0xf000003f, 0x00000007,
1205         0x8b24, 0xffffffff, 0x00ffffff,
1206         0x28350, 0x3f3f3fff, 0x00000082,
1207         0x28354, 0x0000003f, 0x00000000,
1208         0x3e78, 0x00000001, 0x00000002,
1209         0x913c, 0xffff03df, 0x00000004,
1210         0xc768, 0x00000008, 0x00000008,
1211         0x8c00, 0x000008ff, 0x00000800,
1212         0x9508, 0x00010000, 0x00010000,
1213         0xac0c, 0xffffffff, 0x54763210,
1214         0x214f8, 0x01ff01ff, 0x00000002,
1215         0x21498, 0x007ff800, 0x00200000,
1216         0x2015c, 0xffffffff, 0x00000f40,
1217         0x30934, 0xffffffff, 0x00000001
1218 };
1219
1220 static const u32 spectre_mgcg_cgcg_init[] =
1221 {
1222         0xc420, 0xffffffff, 0xfffffffc,
1223         0x30800, 0xffffffff, 0xe0000000,
1224         0x3c2a0, 0xffffffff, 0x00000100,
1225         0x3c208, 0xffffffff, 0x00000100,
1226         0x3c2c0, 0xffffffff, 0x00000100,
1227         0x3c2c8, 0xffffffff, 0x00000100,
1228         0x3c2c4, 0xffffffff, 0x00000100,
1229         0x55e4, 0xffffffff, 0x00600100,
1230         0x3c280, 0xffffffff, 0x00000100,
1231         0x3c214, 0xffffffff, 0x06000100,
1232         0x3c220, 0xffffffff, 0x00000100,
1233         0x3c218, 0xffffffff, 0x06000100,
1234         0x3c204, 0xffffffff, 0x00000100,
1235         0x3c2e0, 0xffffffff, 0x00000100,
1236         0x3c224, 0xffffffff, 0x00000100,
1237         0x3c200, 0xffffffff, 0x00000100,
1238         0x3c230, 0xffffffff, 0x00000100,
1239         0x3c234, 0xffffffff, 0x00000100,
1240         0x3c250, 0xffffffff, 0x00000100,
1241         0x3c254, 0xffffffff, 0x00000100,
1242         0x3c258, 0xffffffff, 0x00000100,
1243         0x3c25c, 0xffffffff, 0x00000100,
1244         0x3c260, 0xffffffff, 0x00000100,
1245         0x3c27c, 0xffffffff, 0x00000100,
1246         0x3c278, 0xffffffff, 0x00000100,
1247         0x3c210, 0xffffffff, 0x06000100,
1248         0x3c290, 0xffffffff, 0x00000100,
1249         0x3c274, 0xffffffff, 0x00000100,
1250         0x3c2b4, 0xffffffff, 0x00000100,
1251         0x3c2b0, 0xffffffff, 0x00000100,
1252         0x3c270, 0xffffffff, 0x00000100,
1253         0x30800, 0xffffffff, 0xe0000000,
1254         0x3c020, 0xffffffff, 0x00010000,
1255         0x3c024, 0xffffffff, 0x00030002,
1256         0x3c028, 0xffffffff, 0x00040007,
1257         0x3c02c, 0xffffffff, 0x00060005,
1258         0x3c030, 0xffffffff, 0x00090008,
1259         0x3c034, 0xffffffff, 0x00010000,
1260         0x3c038, 0xffffffff, 0x00030002,
1261         0x3c03c, 0xffffffff, 0x00040007,
1262         0x3c040, 0xffffffff, 0x00060005,
1263         0x3c044, 0xffffffff, 0x00090008,
1264         0x3c048, 0xffffffff, 0x00010000,
1265         0x3c04c, 0xffffffff, 0x00030002,
1266         0x3c050, 0xffffffff, 0x00040007,
1267         0x3c054, 0xffffffff, 0x00060005,
1268         0x3c058, 0xffffffff, 0x00090008,
1269         0x3c05c, 0xffffffff, 0x00010000,
1270         0x3c060, 0xffffffff, 0x00030002,
1271         0x3c064, 0xffffffff, 0x00040007,
1272         0x3c068, 0xffffffff, 0x00060005,
1273         0x3c06c, 0xffffffff, 0x00090008,
1274         0x3c070, 0xffffffff, 0x00010000,
1275         0x3c074, 0xffffffff, 0x00030002,
1276         0x3c078, 0xffffffff, 0x00040007,
1277         0x3c07c, 0xffffffff, 0x00060005,
1278         0x3c080, 0xffffffff, 0x00090008,
1279         0x3c084, 0xffffffff, 0x00010000,
1280         0x3c088, 0xffffffff, 0x00030002,
1281         0x3c08c, 0xffffffff, 0x00040007,
1282         0x3c090, 0xffffffff, 0x00060005,
1283         0x3c094, 0xffffffff, 0x00090008,
1284         0x3c098, 0xffffffff, 0x00010000,
1285         0x3c09c, 0xffffffff, 0x00030002,
1286         0x3c0a0, 0xffffffff, 0x00040007,
1287         0x3c0a4, 0xffffffff, 0x00060005,
1288         0x3c0a8, 0xffffffff, 0x00090008,
1289         0x3c0ac, 0xffffffff, 0x00010000,
1290         0x3c0b0, 0xffffffff, 0x00030002,
1291         0x3c0b4, 0xffffffff, 0x00040007,
1292         0x3c0b8, 0xffffffff, 0x00060005,
1293         0x3c0bc, 0xffffffff, 0x00090008,
1294         0x3c000, 0xffffffff, 0x96e00200,
1295         0x8708, 0xffffffff, 0x00900100,
1296         0xc424, 0xffffffff, 0x0020003f,
1297         0x38, 0xffffffff, 0x0140001c,
1298         0x3c, 0x000f0000, 0x000f0000,
1299         0x220, 0xffffffff, 0xC060000C,
1300         0x224, 0xc0000fff, 0x00000100,
1301         0xf90, 0xffffffff, 0x00000100,
1302         0xf98, 0x00000101, 0x00000000,
1303         0x20a8, 0xffffffff, 0x00000104,
1304         0x55e4, 0xff000fff, 0x00000100,
1305         0x30cc, 0xc0000fff, 0x00000104,
1306         0xc1e4, 0x00000001, 0x00000001,
1307         0xd00c, 0xff000ff0, 0x00000100,
1308         0xd80c, 0xff000ff0, 0x00000100
1309 };
1310
1311 static const u32 kalindi_golden_spm_registers[] =
1312 {
1313         0x30800, 0xe0ffffff, 0xe0000000
1314 };
1315
1316 static const u32 kalindi_golden_common_registers[] =
1317 {
1318         0xc770, 0xffffffff, 0x00000800,
1319         0xc774, 0xffffffff, 0x00000800,
1320         0xc798, 0xffffffff, 0x00007fbf,
1321         0xc79c, 0xffffffff, 0x00007faf
1322 };
1323
1324 static const u32 kalindi_golden_registers[] =
1325 {
1326         0x3c000, 0xffffdfff, 0x6e944040,
1327         0x55e4, 0xff607fff, 0xfc000100,
1328         0x3c220, 0xff000fff, 0x00000100,
1329         0x3c224, 0xff000fff, 0x00000100,
1330         0x3c200, 0xfffc0fff, 0x00000100,
1331         0x6ed8, 0x00010101, 0x00010000,
1332         0x9830, 0xffffffff, 0x00000000,
1333         0x9834, 0xf00fffff, 0x00000400,
1334         0x5bb0, 0x000000f0, 0x00000070,
1335         0x5bc0, 0xf0311fff, 0x80300000,
1336         0x98f8, 0x73773777, 0x12010001,
1337         0x98fc, 0xffffffff, 0x00000010,
1338         0x9b7c, 0x00ff0000, 0x00fc0000,
1339         0x8030, 0x00001f0f, 0x0000100a,
1340         0x2f48, 0x73773777, 0x12010001,
1341         0x2408, 0x000fffff, 0x000c007f,
1342         0x8a14, 0xf000003f, 0x00000007,
1343         0x8b24, 0x3fff3fff, 0x00ffcfff,
1344         0x30a04, 0x0000ff0f, 0x00000000,
1345         0x28a4c, 0x07ffffff, 0x06000000,
1346         0x4d8, 0x00000fff, 0x00000100,
1347         0x3e78, 0x00000001, 0x00000002,
1348         0xc768, 0x00000008, 0x00000008,
1349         0x8c00, 0x000000ff, 0x00000003,
1350         0x214f8, 0x01ff01ff, 0x00000002,
1351         0x21498, 0x007ff800, 0x00200000,
1352         0x2015c, 0xffffffff, 0x00000f40,
1353         0x88c4, 0x001f3ae3, 0x00000082,
1354         0x88d4, 0x0000001f, 0x00000010,
1355         0x30934, 0xffffffff, 0x00000000
1356 };
1357
1358 static const u32 kalindi_mgcg_cgcg_init[] =
1359 {
1360         0xc420, 0xffffffff, 0xfffffffc,
1361         0x30800, 0xffffffff, 0xe0000000,
1362         0x3c2a0, 0xffffffff, 0x00000100,
1363         0x3c208, 0xffffffff, 0x00000100,
1364         0x3c2c0, 0xffffffff, 0x00000100,
1365         0x3c2c8, 0xffffffff, 0x00000100,
1366         0x3c2c4, 0xffffffff, 0x00000100,
1367         0x55e4, 0xffffffff, 0x00600100,
1368         0x3c280, 0xffffffff, 0x00000100,
1369         0x3c214, 0xffffffff, 0x06000100,
1370         0x3c220, 0xffffffff, 0x00000100,
1371         0x3c218, 0xffffffff, 0x06000100,
1372         0x3c204, 0xffffffff, 0x00000100,
1373         0x3c2e0, 0xffffffff, 0x00000100,
1374         0x3c224, 0xffffffff, 0x00000100,
1375         0x3c200, 0xffffffff, 0x00000100,
1376         0x3c230, 0xffffffff, 0x00000100,
1377         0x3c234, 0xffffffff, 0x00000100,
1378         0x3c250, 0xffffffff, 0x00000100,
1379         0x3c254, 0xffffffff, 0x00000100,
1380         0x3c258, 0xffffffff, 0x00000100,
1381         0x3c25c, 0xffffffff, 0x00000100,
1382         0x3c260, 0xffffffff, 0x00000100,
1383         0x3c27c, 0xffffffff, 0x00000100,
1384         0x3c278, 0xffffffff, 0x00000100,
1385         0x3c210, 0xffffffff, 0x06000100,
1386         0x3c290, 0xffffffff, 0x00000100,
1387         0x3c274, 0xffffffff, 0x00000100,
1388         0x3c2b4, 0xffffffff, 0x00000100,
1389         0x3c2b0, 0xffffffff, 0x00000100,
1390         0x3c270, 0xffffffff, 0x00000100,
1391         0x30800, 0xffffffff, 0xe0000000,
1392         0x3c020, 0xffffffff, 0x00010000,
1393         0x3c024, 0xffffffff, 0x00030002,
1394         0x3c028, 0xffffffff, 0x00040007,
1395         0x3c02c, 0xffffffff, 0x00060005,
1396         0x3c030, 0xffffffff, 0x00090008,
1397         0x3c034, 0xffffffff, 0x00010000,
1398         0x3c038, 0xffffffff, 0x00030002,
1399         0x3c03c, 0xffffffff, 0x00040007,
1400         0x3c040, 0xffffffff, 0x00060005,
1401         0x3c044, 0xffffffff, 0x00090008,
1402         0x3c000, 0xffffffff, 0x96e00200,
1403         0x8708, 0xffffffff, 0x00900100,
1404         0xc424, 0xffffffff, 0x0020003f,
1405         0x38, 0xffffffff, 0x0140001c,
1406         0x3c, 0x000f0000, 0x000f0000,
1407         0x220, 0xffffffff, 0xC060000C,
1408         0x224, 0xc0000fff, 0x00000100,
1409         0x20a8, 0xffffffff, 0x00000104,
1410         0x55e4, 0xff000fff, 0x00000100,
1411         0x30cc, 0xc0000fff, 0x00000104,
1412         0xc1e4, 0x00000001, 0x00000001,
1413         0xd00c, 0xff000ff0, 0x00000100,
1414         0xd80c, 0xff000ff0, 0x00000100
1415 };
1416
1417 static const u32 hawaii_golden_spm_registers[] =
1418 {
1419         0x30800, 0xe0ffffff, 0xe0000000
1420 };
1421
1422 static const u32 hawaii_golden_common_registers[] =
1423 {
1424         0x30800, 0xffffffff, 0xe0000000,
1425         0x28350, 0xffffffff, 0x3a00161a,
1426         0x28354, 0xffffffff, 0x0000002e,
1427         0x9a10, 0xffffffff, 0x00018208,
1428         0x98f8, 0xffffffff, 0x12011003
1429 };
1430
1431 static const u32 hawaii_golden_registers[] =
1432 {
1433         0x3354, 0x00000333, 0x00000333,
1434         0x9a10, 0x00010000, 0x00058208,
1435         0x9830, 0xffffffff, 0x00000000,
1436         0x9834, 0xf00fffff, 0x00000400,
1437         0x9838, 0x0002021c, 0x00020200,
1438         0xc78, 0x00000080, 0x00000000,
1439         0x5bb0, 0x000000f0, 0x00000070,
1440         0x5bc0, 0xf0311fff, 0x80300000,
1441         0x350c, 0x00810000, 0x408af000,
1442         0x7030, 0x31000111, 0x00000011,
1443         0x2f48, 0x73773777, 0x12010001,
1444         0x2120, 0x0000007f, 0x0000001b,
1445         0x21dc, 0x00007fb6, 0x00002191,
1446         0x3628, 0x0000003f, 0x0000000a,
1447         0x362c, 0x0000003f, 0x0000000a,
1448         0x2ae4, 0x00073ffe, 0x000022a2,
1449         0x240c, 0x000007ff, 0x00000000,
1450         0x8bf0, 0x00002001, 0x00000001,
1451         0x8b24, 0xffffffff, 0x00ffffff,
1452         0x30a04, 0x0000ff0f, 0x00000000,
1453         0x28a4c, 0x07ffffff, 0x06000000,
1454         0x3e78, 0x00000001, 0x00000002,
1455         0xc768, 0x00000008, 0x00000008,
1456         0xc770, 0x00000f00, 0x00000800,
1457         0xc774, 0x00000f00, 0x00000800,
1458         0xc798, 0x00ffffff, 0x00ff7fbf,
1459         0xc79c, 0x00ffffff, 0x00ff7faf,
1460         0x8c00, 0x000000ff, 0x00000800,
1461         0xe40, 0x00001fff, 0x00001fff,
1462         0x9060, 0x0000007f, 0x00000020,
1463         0x9508, 0x00010000, 0x00010000,
1464         0xae00, 0x00100000, 0x000ff07c,
1465         0xac14, 0x000003ff, 0x0000000f,
1466         0xac10, 0xffffffff, 0x7564fdec,
1467         0xac0c, 0xffffffff, 0x3120b9a8,
1468         0xac08, 0x20000000, 0x0f9c0000
1469 };
1470
1471 static const u32 hawaii_mgcg_cgcg_init[] =
1472 {
1473         0xc420, 0xffffffff, 0xfffffffd,
1474         0x30800, 0xffffffff, 0xe0000000,
1475         0x3c2a0, 0xffffffff, 0x00000100,
1476         0x3c208, 0xffffffff, 0x00000100,
1477         0x3c2c0, 0xffffffff, 0x00000100,
1478         0x3c2c8, 0xffffffff, 0x00000100,
1479         0x3c2c4, 0xffffffff, 0x00000100,
1480         0x55e4, 0xffffffff, 0x00200100,
1481         0x3c280, 0xffffffff, 0x00000100,
1482         0x3c214, 0xffffffff, 0x06000100,
1483         0x3c220, 0xffffffff, 0x00000100,
1484         0x3c218, 0xffffffff, 0x06000100,
1485         0x3c204, 0xffffffff, 0x00000100,
1486         0x3c2e0, 0xffffffff, 0x00000100,
1487         0x3c224, 0xffffffff, 0x00000100,
1488         0x3c200, 0xffffffff, 0x00000100,
1489         0x3c230, 0xffffffff, 0x00000100,
1490         0x3c234, 0xffffffff, 0x00000100,
1491         0x3c250, 0xffffffff, 0x00000100,
1492         0x3c254, 0xffffffff, 0x00000100,
1493         0x3c258, 0xffffffff, 0x00000100,
1494         0x3c25c, 0xffffffff, 0x00000100,
1495         0x3c260, 0xffffffff, 0x00000100,
1496         0x3c27c, 0xffffffff, 0x00000100,
1497         0x3c278, 0xffffffff, 0x00000100,
1498         0x3c210, 0xffffffff, 0x06000100,
1499         0x3c290, 0xffffffff, 0x00000100,
1500         0x3c274, 0xffffffff, 0x00000100,
1501         0x3c2b4, 0xffffffff, 0x00000100,
1502         0x3c2b0, 0xffffffff, 0x00000100,
1503         0x3c270, 0xffffffff, 0x00000100,
1504         0x30800, 0xffffffff, 0xe0000000,
1505         0x3c020, 0xffffffff, 0x00010000,
1506         0x3c024, 0xffffffff, 0x00030002,
1507         0x3c028, 0xffffffff, 0x00040007,
1508         0x3c02c, 0xffffffff, 0x00060005,
1509         0x3c030, 0xffffffff, 0x00090008,
1510         0x3c034, 0xffffffff, 0x00010000,
1511         0x3c038, 0xffffffff, 0x00030002,
1512         0x3c03c, 0xffffffff, 0x00040007,
1513         0x3c040, 0xffffffff, 0x00060005,
1514         0x3c044, 0xffffffff, 0x00090008,
1515         0x3c048, 0xffffffff, 0x00010000,
1516         0x3c04c, 0xffffffff, 0x00030002,
1517         0x3c050, 0xffffffff, 0x00040007,
1518         0x3c054, 0xffffffff, 0x00060005,
1519         0x3c058, 0xffffffff, 0x00090008,
1520         0x3c05c, 0xffffffff, 0x00010000,
1521         0x3c060, 0xffffffff, 0x00030002,
1522         0x3c064, 0xffffffff, 0x00040007,
1523         0x3c068, 0xffffffff, 0x00060005,
1524         0x3c06c, 0xffffffff, 0x00090008,
1525         0x3c070, 0xffffffff, 0x00010000,
1526         0x3c074, 0xffffffff, 0x00030002,
1527         0x3c078, 0xffffffff, 0x00040007,
1528         0x3c07c, 0xffffffff, 0x00060005,
1529         0x3c080, 0xffffffff, 0x00090008,
1530         0x3c084, 0xffffffff, 0x00010000,
1531         0x3c088, 0xffffffff, 0x00030002,
1532         0x3c08c, 0xffffffff, 0x00040007,
1533         0x3c090, 0xffffffff, 0x00060005,
1534         0x3c094, 0xffffffff, 0x00090008,
1535         0x3c098, 0xffffffff, 0x00010000,
1536         0x3c09c, 0xffffffff, 0x00030002,
1537         0x3c0a0, 0xffffffff, 0x00040007,
1538         0x3c0a4, 0xffffffff, 0x00060005,
1539         0x3c0a8, 0xffffffff, 0x00090008,
1540         0x3c0ac, 0xffffffff, 0x00010000,
1541         0x3c0b0, 0xffffffff, 0x00030002,
1542         0x3c0b4, 0xffffffff, 0x00040007,
1543         0x3c0b8, 0xffffffff, 0x00060005,
1544         0x3c0bc, 0xffffffff, 0x00090008,
1545         0x3c0c0, 0xffffffff, 0x00010000,
1546         0x3c0c4, 0xffffffff, 0x00030002,
1547         0x3c0c8, 0xffffffff, 0x00040007,
1548         0x3c0cc, 0xffffffff, 0x00060005,
1549         0x3c0d0, 0xffffffff, 0x00090008,
1550         0x3c0d4, 0xffffffff, 0x00010000,
1551         0x3c0d8, 0xffffffff, 0x00030002,
1552         0x3c0dc, 0xffffffff, 0x00040007,
1553         0x3c0e0, 0xffffffff, 0x00060005,
1554         0x3c0e4, 0xffffffff, 0x00090008,
1555         0x3c0e8, 0xffffffff, 0x00010000,
1556         0x3c0ec, 0xffffffff, 0x00030002,
1557         0x3c0f0, 0xffffffff, 0x00040007,
1558         0x3c0f4, 0xffffffff, 0x00060005,
1559         0x3c0f8, 0xffffffff, 0x00090008,
1560         0xc318, 0xffffffff, 0x00020200,
1561         0x3350, 0xffffffff, 0x00000200,
1562         0x15c0, 0xffffffff, 0x00000400,
1563         0x55e8, 0xffffffff, 0x00000000,
1564         0x2f50, 0xffffffff, 0x00000902,
1565         0x3c000, 0xffffffff, 0x96940200,
1566         0x8708, 0xffffffff, 0x00900100,
1567         0xc424, 0xffffffff, 0x0020003f,
1568         0x38, 0xffffffff, 0x0140001c,
1569         0x3c, 0x000f0000, 0x000f0000,
1570         0x220, 0xffffffff, 0xc060000c,
1571         0x224, 0xc0000fff, 0x00000100,
1572         0xf90, 0xffffffff, 0x00000100,
1573         0xf98, 0x00000101, 0x00000000,
1574         0x20a8, 0xffffffff, 0x00000104,
1575         0x55e4, 0xff000fff, 0x00000100,
1576         0x30cc, 0xc0000fff, 0x00000104,
1577         0xc1e4, 0x00000001, 0x00000001,
1578         0xd00c, 0xff000ff0, 0x00000100,
1579         0xd80c, 0xff000ff0, 0x00000100
1580 };
1581
1582 static const u32 godavari_golden_registers[] =
1583 {
1584         0x55e4, 0xff607fff, 0xfc000100,
1585         0x6ed8, 0x00010101, 0x00010000,
1586         0x9830, 0xffffffff, 0x00000000,
1587         0x98302, 0xf00fffff, 0x00000400,
1588         0x6130, 0xffffffff, 0x00010000,
1589         0x5bb0, 0x000000f0, 0x00000070,
1590         0x5bc0, 0xf0311fff, 0x80300000,
1591         0x98f8, 0x73773777, 0x12010001,
1592         0x98fc, 0xffffffff, 0x00000010,
1593         0x8030, 0x00001f0f, 0x0000100a,
1594         0x2f48, 0x73773777, 0x12010001,
1595         0x2408, 0x000fffff, 0x000c007f,
1596         0x8a14, 0xf000003f, 0x00000007,
1597         0x8b24, 0xffffffff, 0x00ff0fff,
1598         0x30a04, 0x0000ff0f, 0x00000000,
1599         0x28a4c, 0x07ffffff, 0x06000000,
1600         0x4d8, 0x00000fff, 0x00000100,
1601         0xd014, 0x00010000, 0x00810001,
1602         0xd814, 0x00010000, 0x00810001,
1603         0x3e78, 0x00000001, 0x00000002,
1604         0xc768, 0x00000008, 0x00000008,
1605         0xc770, 0x00000f00, 0x00000800,
1606         0xc774, 0x00000f00, 0x00000800,
1607         0xc798, 0x00ffffff, 0x00ff7fbf,
1608         0xc79c, 0x00ffffff, 0x00ff7faf,
1609         0x8c00, 0x000000ff, 0x00000001,
1610         0x214f8, 0x01ff01ff, 0x00000002,
1611         0x21498, 0x007ff800, 0x00200000,
1612         0x2015c, 0xffffffff, 0x00000f40,
1613         0x88c4, 0x001f3ae3, 0x00000082,
1614         0x88d4, 0x0000001f, 0x00000010,
1615         0x30934, 0xffffffff, 0x00000000
1616 };
1617
1618
1619 static void cik_init_golden_registers(struct radeon_device *rdev)
1620 {
1621         switch (rdev->family) {
1622         case CHIP_BONAIRE:
1623                 radeon_program_register_sequence(rdev,
1624                                                  bonaire_mgcg_cgcg_init,
1625                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1626                 radeon_program_register_sequence(rdev,
1627                                                  bonaire_golden_registers,
1628                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1629                 radeon_program_register_sequence(rdev,
1630                                                  bonaire_golden_common_registers,
1631                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1632                 radeon_program_register_sequence(rdev,
1633                                                  bonaire_golden_spm_registers,
1634                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1635                 break;
1636         case CHIP_KABINI:
1637                 radeon_program_register_sequence(rdev,
1638                                                  kalindi_mgcg_cgcg_init,
1639                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1640                 radeon_program_register_sequence(rdev,
1641                                                  kalindi_golden_registers,
1642                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1643                 radeon_program_register_sequence(rdev,
1644                                                  kalindi_golden_common_registers,
1645                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1646                 radeon_program_register_sequence(rdev,
1647                                                  kalindi_golden_spm_registers,
1648                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1649                 break;
1650         case CHIP_MULLINS:
1651                 radeon_program_register_sequence(rdev,
1652                                                  kalindi_mgcg_cgcg_init,
1653                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1654                 radeon_program_register_sequence(rdev,
1655                                                  godavari_golden_registers,
1656                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1657                 radeon_program_register_sequence(rdev,
1658                                                  kalindi_golden_common_registers,
1659                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1660                 radeon_program_register_sequence(rdev,
1661                                                  kalindi_golden_spm_registers,
1662                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1663                 break;
1664         case CHIP_KAVERI:
1665                 radeon_program_register_sequence(rdev,
1666                                                  spectre_mgcg_cgcg_init,
1667                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1668                 radeon_program_register_sequence(rdev,
1669                                                  spectre_golden_registers,
1670                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1671                 radeon_program_register_sequence(rdev,
1672                                                  spectre_golden_common_registers,
1673                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1674                 radeon_program_register_sequence(rdev,
1675                                                  spectre_golden_spm_registers,
1676                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1677                 break;
1678         case CHIP_HAWAII:
1679                 radeon_program_register_sequence(rdev,
1680                                                  hawaii_mgcg_cgcg_init,
1681                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1682                 radeon_program_register_sequence(rdev,
1683                                                  hawaii_golden_registers,
1684                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1685                 radeon_program_register_sequence(rdev,
1686                                                  hawaii_golden_common_registers,
1687                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1688                 radeon_program_register_sequence(rdev,
1689                                                  hawaii_golden_spm_registers,
1690                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1691                 break;
1692         default:
1693                 break;
1694         }
1695 }
1696
1697 /**
1698  * cik_get_xclk - get the xclk
1699  *
1700  * @rdev: radeon_device pointer
1701  *
1702  * Returns the reference clock used by the gfx engine
1703  * (CIK).
1704  */
1705 u32 cik_get_xclk(struct radeon_device *rdev)
1706 {
1707         u32 reference_clock = rdev->clock.spll.reference_freq;
1708
1709         if (rdev->flags & RADEON_IS_IGP) {
1710                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1711                         return reference_clock / 2;
1712         } else {
1713                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1714                         return reference_clock / 4;
1715         }
1716         return reference_clock;
1717 }
1718
1719 /**
1720  * cik_mm_rdoorbell - read a doorbell dword
1721  *
1722  * @rdev: radeon_device pointer
1723  * @index: doorbell index
1724  *
1725  * Returns the value in the doorbell aperture at the
1726  * requested doorbell index (CIK).
1727  */
1728 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1729 {
1730         if (index < rdev->doorbell.num_doorbells) {
1731                 return readl(rdev->doorbell.ptr + index);
1732         } else {
1733                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1734                 return 0;
1735         }
1736 }
1737
1738 /**
1739  * cik_mm_wdoorbell - write a doorbell dword
1740  *
1741  * @rdev: radeon_device pointer
1742  * @index: doorbell index
1743  * @v: value to write
1744  *
1745  * Writes @v to the doorbell aperture at the
1746  * requested doorbell index (CIK).
1747  */
1748 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1749 {
1750         if (index < rdev->doorbell.num_doorbells) {
1751                 writel(v, rdev->doorbell.ptr + index);
1752         } else {
1753                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1754         }
1755 }
1756
1757 #define BONAIRE_IO_MC_REGS_SIZE 36
1758
1759 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1760 {
1761         {0x00000070, 0x04400000},
1762         {0x00000071, 0x80c01803},
1763         {0x00000072, 0x00004004},
1764         {0x00000073, 0x00000100},
1765         {0x00000074, 0x00ff0000},
1766         {0x00000075, 0x34000000},
1767         {0x00000076, 0x08000014},
1768         {0x00000077, 0x00cc08ec},
1769         {0x00000078, 0x00000400},
1770         {0x00000079, 0x00000000},
1771         {0x0000007a, 0x04090000},
1772         {0x0000007c, 0x00000000},
1773         {0x0000007e, 0x4408a8e8},
1774         {0x0000007f, 0x00000304},
1775         {0x00000080, 0x00000000},
1776         {0x00000082, 0x00000001},
1777         {0x00000083, 0x00000002},
1778         {0x00000084, 0xf3e4f400},
1779         {0x00000085, 0x052024e3},
1780         {0x00000087, 0x00000000},
1781         {0x00000088, 0x01000000},
1782         {0x0000008a, 0x1c0a0000},
1783         {0x0000008b, 0xff010000},
1784         {0x0000008d, 0xffffefff},
1785         {0x0000008e, 0xfff3efff},
1786         {0x0000008f, 0xfff3efbf},
1787         {0x00000092, 0xf7ffffff},
1788         {0x00000093, 0xffffff7f},
1789         {0x00000095, 0x00101101},
1790         {0x00000096, 0x00000fff},
1791         {0x00000097, 0x00116fff},
1792         {0x00000098, 0x60010000},
1793         {0x00000099, 0x10010000},
1794         {0x0000009a, 0x00006000},
1795         {0x0000009b, 0x00001000},
1796         {0x0000009f, 0x00b48000}
1797 };
1798
1799 #define HAWAII_IO_MC_REGS_SIZE 22
1800
1801 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1802 {
1803         {0x0000007d, 0x40000000},
1804         {0x0000007e, 0x40180304},
1805         {0x0000007f, 0x0000ff00},
1806         {0x00000081, 0x00000000},
1807         {0x00000083, 0x00000800},
1808         {0x00000086, 0x00000000},
1809         {0x00000087, 0x00000100},
1810         {0x00000088, 0x00020100},
1811         {0x00000089, 0x00000000},
1812         {0x0000008b, 0x00040000},
1813         {0x0000008c, 0x00000100},
1814         {0x0000008e, 0xff010000},
1815         {0x00000090, 0xffffefff},
1816         {0x00000091, 0xfff3efff},
1817         {0x00000092, 0xfff3efbf},
1818         {0x00000093, 0xf7ffffff},
1819         {0x00000094, 0xffffff7f},
1820         {0x00000095, 0x00000fff},
1821         {0x00000096, 0x00116fff},
1822         {0x00000097, 0x60010000},
1823         {0x00000098, 0x10010000},
1824         {0x0000009f, 0x00c79000}
1825 };
1826
1827
1828 /**
1829  * cik_srbm_select - select specific register instances
1830  *
1831  * @rdev: radeon_device pointer
1832  * @me: selected ME (micro engine)
1833  * @pipe: pipe
1834  * @queue: queue
1835  * @vmid: VMID
1836  *
1837  * Switches the currently active registers instances.  Some
1838  * registers are instanced per VMID, others are instanced per
1839  * me/pipe/queue combination.
1840  */
1841 static void cik_srbm_select(struct radeon_device *rdev,
1842                             u32 me, u32 pipe, u32 queue, u32 vmid)
1843 {
1844         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1845                              MEID(me & 0x3) |
1846                              VMID(vmid & 0xf) |
1847                              QUEUEID(queue & 0x7));
1848         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1849 }
1850
1851 /* ucode loading */
1852 /**
1853  * ci_mc_load_microcode - load MC ucode into the hw
1854  *
1855  * @rdev: radeon_device pointer
1856  *
1857  * Load the GDDR MC ucode into the hw (CIK).
1858  * Returns 0 on success, error on failure.
1859  */
1860 int ci_mc_load_microcode(struct radeon_device *rdev)
1861 {
1862         const __be32 *fw_data = NULL;
1863         const __le32 *new_fw_data = NULL;
1864         u32 running, tmp;
1865         u32 *io_mc_regs = NULL;
1866         const __le32 *new_io_mc_regs = NULL;
1867         int i, regs_size, ucode_size;
1868
1869         if (!rdev->mc_fw)
1870                 return -EINVAL;
1871
1872         if (rdev->new_fw) {
1873                 const struct mc_firmware_header_v1_0 *hdr =
1874                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1875
1876                 radeon_ucode_print_mc_hdr(&hdr->header);
1877
1878                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1879                 new_io_mc_regs = (const __le32 *)
1880                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1881                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1882                 new_fw_data = (const __le32 *)
1883                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1884         } else {
1885                 ucode_size = rdev->mc_fw->size / 4;
1886
1887                 switch (rdev->family) {
1888                 case CHIP_BONAIRE:
1889                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1890                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1891                         break;
1892                 case CHIP_HAWAII:
1893                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1894                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1895                         break;
1896                 default:
1897                         return -EINVAL;
1898                 }
1899                 fw_data = (const __be32 *)rdev->mc_fw->data;
1900         }
1901
1902         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1903
1904         if (running == 0) {
1905                 /* reset the engine and set to writable */
1906                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1907                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1908
1909                 /* load mc io regs */
1910                 for (i = 0; i < regs_size; i++) {
1911                         if (rdev->new_fw) {
1912                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1913                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1914                         } else {
1915                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1916                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1917                         }
1918                 }
1919
1920                 tmp = RREG32(MC_SEQ_MISC0);
1921                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1922                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1923                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1924                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1925                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1926                 }
1927
1928                 /* load the MC ucode */
1929                 for (i = 0; i < ucode_size; i++) {
1930                         if (rdev->new_fw)
1931                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1932                         else
1933                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1934                 }
1935
1936                 /* put the engine back into the active state */
1937                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1938                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1939                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1940
1941                 /* wait for training to complete */
1942                 for (i = 0; i < rdev->usec_timeout; i++) {
1943                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1944                                 break;
1945                         udelay(1);
1946                 }
1947                 for (i = 0; i < rdev->usec_timeout; i++) {
1948                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1949                                 break;
1950                         udelay(1);
1951                 }
1952         }
1953
1954         return 0;
1955 }
1956
1957 /**
1958  * cik_init_microcode - load ucode images from disk
1959  *
1960  * @rdev: radeon_device pointer
1961  *
1962  * Use the firmware interface to load the ucode images into
1963  * the driver (not loaded into hw).
1964  * Returns 0 on success, error on failure.
1965  */
1966 static int cik_init_microcode(struct radeon_device *rdev)
1967 {
1968         const char *chip_name;
1969         const char *new_chip_name;
1970         size_t pfp_req_size, me_req_size, ce_req_size,
1971                 mec_req_size, rlc_req_size, mc_req_size = 0,
1972                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1973         char fw_name[30];
1974         int new_fw = 0;
1975         int err;
1976         int num_fw;
1977         bool new_smc = false;
1978
1979         DRM_DEBUG("\n");
1980
1981         switch (rdev->family) {
1982         case CHIP_BONAIRE:
1983                 chip_name = "BONAIRE";
1984                 if ((rdev->pdev->revision == 0x80) ||
1985                     (rdev->pdev->revision == 0x81) ||
1986                     (rdev->pdev->device == 0x665f))
1987                         new_smc = true;
1988                 new_chip_name = "bonaire";
1989                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1990                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1991                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1992                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1993                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1994                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1995                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1996                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1998                 num_fw = 8;
1999                 break;
2000         case CHIP_HAWAII:
2001                 chip_name = "HAWAII";
2002                 if (rdev->pdev->revision == 0x80)
2003                         new_smc = true;
2004                 new_chip_name = "hawaii";
2005                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2006                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2007                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2008                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2009                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2010                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2011                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2012                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2013                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2014                 num_fw = 8;
2015                 break;
2016         case CHIP_KAVERI:
2017                 chip_name = "KAVERI";
2018                 new_chip_name = "kaveri";
2019                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2020                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2021                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2022                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2023                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2024                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2025                 num_fw = 7;
2026                 break;
2027         case CHIP_KABINI:
2028                 chip_name = "KABINI";
2029                 new_chip_name = "kabini";
2030                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2031                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2032                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2033                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2034                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2035                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2036                 num_fw = 6;
2037                 break;
2038         case CHIP_MULLINS:
2039                 chip_name = "MULLINS";
2040                 new_chip_name = "mullins";
2041                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2042                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2043                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2044                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2045                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2046                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2047                 num_fw = 6;
2048                 break;
2049         default: BUG();
2050         }
2051
2052         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2053
2054         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2055         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2056         if (err) {
2057                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2058                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2059                 if (err)
2060                         goto out;
2061                 if (rdev->pfp_fw->size != pfp_req_size) {
2062                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2063                                rdev->pfp_fw->size, fw_name);
2064                         err = -EINVAL;
2065                         goto out;
2066                 }
2067         } else {
2068                 err = radeon_ucode_validate(rdev->pfp_fw);
2069                 if (err) {
2070                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2071                                fw_name);
2072                         goto out;
2073                 } else {
2074                         new_fw++;
2075                 }
2076         }
2077
2078         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2079         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2080         if (err) {
2081                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2082                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2083                 if (err)
2084                         goto out;
2085                 if (rdev->me_fw->size != me_req_size) {
2086                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2087                                rdev->me_fw->size, fw_name);
2088                         err = -EINVAL;
2089                 }
2090         } else {
2091                 err = radeon_ucode_validate(rdev->me_fw);
2092                 if (err) {
2093                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2094                                fw_name);
2095                         goto out;
2096                 } else {
2097                         new_fw++;
2098                 }
2099         }
2100
2101         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2102         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2103         if (err) {
2104                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2105                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2106                 if (err)
2107                         goto out;
2108                 if (rdev->ce_fw->size != ce_req_size) {
2109                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2110                                rdev->ce_fw->size, fw_name);
2111                         err = -EINVAL;
2112                 }
2113         } else {
2114                 err = radeon_ucode_validate(rdev->ce_fw);
2115                 if (err) {
2116                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2117                                fw_name);
2118                         goto out;
2119                 } else {
2120                         new_fw++;
2121                 }
2122         }
2123
2124         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2125         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2126         if (err) {
2127                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2128                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2129                 if (err)
2130                         goto out;
2131                 if (rdev->mec_fw->size != mec_req_size) {
2132                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2133                                rdev->mec_fw->size, fw_name);
2134                         err = -EINVAL;
2135                 }
2136         } else {
2137                 err = radeon_ucode_validate(rdev->mec_fw);
2138                 if (err) {
2139                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2140                                fw_name);
2141                         goto out;
2142                 } else {
2143                         new_fw++;
2144                 }
2145         }
2146
2147         if (rdev->family == CHIP_KAVERI) {
2148                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2149                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2150                 if (err) {
2151                         goto out;
2152                 } else {
2153                         err = radeon_ucode_validate(rdev->mec2_fw);
2154                         if (err) {
2155                                 goto out;
2156                         } else {
2157                                 new_fw++;
2158                         }
2159                 }
2160         }
2161
2162         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2163         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2164         if (err) {
2165                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2166                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2167                 if (err)
2168                         goto out;
2169                 if (rdev->rlc_fw->size != rlc_req_size) {
2170                         pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2171                                rdev->rlc_fw->size, fw_name);
2172                         err = -EINVAL;
2173                 }
2174         } else {
2175                 err = radeon_ucode_validate(rdev->rlc_fw);
2176                 if (err) {
2177                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2178                                fw_name);
2179                         goto out;
2180                 } else {
2181                         new_fw++;
2182                 }
2183         }
2184
2185         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2186         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2187         if (err) {
2188                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2189                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2190                 if (err)
2191                         goto out;
2192                 if (rdev->sdma_fw->size != sdma_req_size) {
2193                         pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2194                                rdev->sdma_fw->size, fw_name);
2195                         err = -EINVAL;
2196                 }
2197         } else {
2198                 err = radeon_ucode_validate(rdev->sdma_fw);
2199                 if (err) {
2200                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2201                                fw_name);
2202                         goto out;
2203                 } else {
2204                         new_fw++;
2205                 }
2206         }
2207
2208         /* No SMC, MC ucode on APUs */
2209         if (!(rdev->flags & RADEON_IS_IGP)) {
2210                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2211                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2212                 if (err) {
2213                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2214                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2215                         if (err) {
2216                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2217                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2218                                 if (err)
2219                                         goto out;
2220                         }
2221                         if ((rdev->mc_fw->size != mc_req_size) &&
2222                             (rdev->mc_fw->size != mc2_req_size)){
2223                                 pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2224                                        rdev->mc_fw->size, fw_name);
2225                                 err = -EINVAL;
2226                         }
2227                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2228                 } else {
2229                         err = radeon_ucode_validate(rdev->mc_fw);
2230                         if (err) {
2231                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2232                                        fw_name);
2233                                 goto out;
2234                         } else {
2235                                 new_fw++;
2236                         }
2237                 }
2238
2239                 if (new_smc)
2240                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2241                 else
2242                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2243                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2244                 if (err) {
2245                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2246                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2247                         if (err) {
2248                                 pr_err("smc: error loading firmware \"%s\"\n",
2249                                        fw_name);
2250                                 release_firmware(rdev->smc_fw);
2251                                 rdev->smc_fw = NULL;
2252                                 err = 0;
2253                         } else if (rdev->smc_fw->size != smc_req_size) {
2254                                 pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2255                                        rdev->smc_fw->size, fw_name);
2256                                 err = -EINVAL;
2257                         }
2258                 } else {
2259                         err = radeon_ucode_validate(rdev->smc_fw);
2260                         if (err) {
2261                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2262                                        fw_name);
2263                                 goto out;
2264                         } else {
2265                                 new_fw++;
2266                         }
2267                 }
2268         }
2269
2270         if (new_fw == 0) {
2271                 rdev->new_fw = false;
2272         } else if (new_fw < num_fw) {
2273                 pr_err("ci_fw: mixing new and old firmware!\n");
2274                 err = -EINVAL;
2275         } else {
2276                 rdev->new_fw = true;
2277         }
2278
2279 out:
2280         if (err) {
2281                 if (err != -EINVAL)
2282                         pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2283                                fw_name);
2284                 release_firmware(rdev->pfp_fw);
2285                 rdev->pfp_fw = NULL;
2286                 release_firmware(rdev->me_fw);
2287                 rdev->me_fw = NULL;
2288                 release_firmware(rdev->ce_fw);
2289                 rdev->ce_fw = NULL;
2290                 release_firmware(rdev->mec_fw);
2291                 rdev->mec_fw = NULL;
2292                 release_firmware(rdev->mec2_fw);
2293                 rdev->mec2_fw = NULL;
2294                 release_firmware(rdev->rlc_fw);
2295                 rdev->rlc_fw = NULL;
2296                 release_firmware(rdev->sdma_fw);
2297                 rdev->sdma_fw = NULL;
2298                 release_firmware(rdev->mc_fw);
2299                 rdev->mc_fw = NULL;
2300                 release_firmware(rdev->smc_fw);
2301                 rdev->smc_fw = NULL;
2302         }
2303         return err;
2304 }
2305
2306 /*
2307  * Core functions
2308  */
2309 /**
2310  * cik_tiling_mode_table_init - init the hw tiling table
2311  *
2312  * @rdev: radeon_device pointer
2313  *
2314  * Starting with SI, the tiling setup is done globally in a
2315  * set of 32 tiling modes.  Rather than selecting each set of
2316  * parameters per surface as on older asics, we just select
2317  * which index in the tiling table we want to use, and the
2318  * surface uses those parameters (CIK).
2319  */
2320 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2321 {
2322         u32 *tile = rdev->config.cik.tile_mode_array;
2323         u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2324         const u32 num_tile_mode_states =
2325                         ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2326         const u32 num_secondary_tile_mode_states =
2327                         ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2328         u32 reg_offset, split_equal_to_row_size;
2329         u32 num_pipe_configs;
2330         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2331                 rdev->config.cik.max_shader_engines;
2332
2333         switch (rdev->config.cik.mem_row_size_in_kb) {
2334         case 1:
2335                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2336                 break;
2337         case 2:
2338         default:
2339                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2340                 break;
2341         case 4:
2342                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2343                 break;
2344         }
2345
2346         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2347         if (num_pipe_configs > 8)
2348                 num_pipe_configs = 16;
2349
2350         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2351                 tile[reg_offset] = 0;
2352         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2353                 macrotile[reg_offset] = 0;
2354
2355         switch(num_pipe_configs) {
2356         case 16:
2357                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2361                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2363                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2365                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2366                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2367                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2369                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2371                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2373                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376                            TILE_SPLIT(split_equal_to_row_size));
2377                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2378                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2380                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2381                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2382                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2384                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2385                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2386                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387                            TILE_SPLIT(split_equal_to_row_size));
2388                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2389                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2390                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2393                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2395                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2397                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2398                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2399                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2400                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2401                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2402                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2403                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2406                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2408                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2410                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2413                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2414                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2415                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2417                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2418                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2421                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2423                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2424                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2425                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2428                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2429                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2430                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2432                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2433                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435
2436                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2438                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2439                            NUM_BANKS(ADDR_SURF_16_BANK));
2440                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2442                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2443                            NUM_BANKS(ADDR_SURF_16_BANK));
2444                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2446                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2447                            NUM_BANKS(ADDR_SURF_16_BANK));
2448                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2450                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2451                            NUM_BANKS(ADDR_SURF_16_BANK));
2452                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2454                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2455                            NUM_BANKS(ADDR_SURF_8_BANK));
2456                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459                            NUM_BANKS(ADDR_SURF_4_BANK));
2460                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2462                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463                            NUM_BANKS(ADDR_SURF_2_BANK));
2464                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2466                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467                            NUM_BANKS(ADDR_SURF_16_BANK));
2468                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2470                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471                            NUM_BANKS(ADDR_SURF_16_BANK));
2472                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475                             NUM_BANKS(ADDR_SURF_16_BANK));
2476                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479                             NUM_BANKS(ADDR_SURF_8_BANK));
2480                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483                             NUM_BANKS(ADDR_SURF_4_BANK));
2484                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487                             NUM_BANKS(ADDR_SURF_2_BANK));
2488                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491                             NUM_BANKS(ADDR_SURF_2_BANK));
2492
2493                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2494                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2495                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2496                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2497                 break;
2498
2499         case 8:
2500                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2501                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2502                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2503                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2504                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2506                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2507                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2508                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2510                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2511                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2512                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2514                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2516                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2518                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519                            TILE_SPLIT(split_equal_to_row_size));
2520                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2521                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2524                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2525                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2527                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2528                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2529                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530                            TILE_SPLIT(split_equal_to_row_size));
2531                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2532                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2533                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2536                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2538                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2541                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2542                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2543                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2544                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2545                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2546                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2548                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2551                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2553                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2555                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2556                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2557                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2558                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2559                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2560                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2563                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2564                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2566                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2567                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2568                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2571                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2572                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2573                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2574                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2575                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2576                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2578
2579                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2581                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2582                                 NUM_BANKS(ADDR_SURF_16_BANK));
2583                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2585                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2586                                 NUM_BANKS(ADDR_SURF_16_BANK));
2587                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2589                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2590                                 NUM_BANKS(ADDR_SURF_16_BANK));
2591                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2593                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2594                                 NUM_BANKS(ADDR_SURF_16_BANK));
2595                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2597                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2598                                 NUM_BANKS(ADDR_SURF_8_BANK));
2599                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2601                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2602                                 NUM_BANKS(ADDR_SURF_4_BANK));
2603                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2605                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2606                                 NUM_BANKS(ADDR_SURF_2_BANK));
2607                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2609                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2610                                 NUM_BANKS(ADDR_SURF_16_BANK));
2611                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2613                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2614                                 NUM_BANKS(ADDR_SURF_16_BANK));
2615                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2617                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618                                 NUM_BANKS(ADDR_SURF_16_BANK));
2619                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2622                                 NUM_BANKS(ADDR_SURF_16_BANK));
2623                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626                                 NUM_BANKS(ADDR_SURF_8_BANK));
2627                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630                                 NUM_BANKS(ADDR_SURF_4_BANK));
2631                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2634                                 NUM_BANKS(ADDR_SURF_2_BANK));
2635
2636                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2637                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2638                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2639                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2640                 break;
2641
2642         case 4:
2643                 if (num_rbs == 4) {
2644                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2646                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2647                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2648                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2649                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2650                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2652                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2653                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2654                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2656                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2658                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2660                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2662                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663                            TILE_SPLIT(split_equal_to_row_size));
2664                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2665                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2666                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2667                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2668                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2669                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2671                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2672                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2673                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2674                            TILE_SPLIT(split_equal_to_row_size));
2675                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2676                            PIPE_CONFIG(ADDR_SURF_P4_16x16));
2677                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2680                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2682                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2684                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2685                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2686                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2687                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2688                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2690                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2693                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2695                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2697                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2699                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2700                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2701                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2702                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2704                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2705                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2708                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2710                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2711                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2712                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2714                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2715                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2716                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2717                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2718                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2719                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2720                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722
2723                 } else if (num_rbs < 4) {
2724                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2725                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2726                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2727                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2728                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2729                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2730                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2731                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2732                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2734                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2735                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2736                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2737                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2738                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2739                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2740                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2741                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2742                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2743                            TILE_SPLIT(split_equal_to_row_size));
2744                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2745                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2746                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2747                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2748                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2749                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2750                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2751                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2752                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2753                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2754                            TILE_SPLIT(split_equal_to_row_size));
2755                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2756                            PIPE_CONFIG(ADDR_SURF_P4_8x16));
2757                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2760                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2762                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2764                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2765                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2766                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2768                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2769                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2770                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2772                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2773                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2775                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2777                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2779                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2780                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2781                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2783                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2784                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2785                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2786                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2787                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2788                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2789                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2790                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2791                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2792                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2793                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2794                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2795                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2796                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2797                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2798                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2799                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2800                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2801                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2802                 }
2803
2804                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2806                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2807                                 NUM_BANKS(ADDR_SURF_16_BANK));
2808                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2810                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811                                 NUM_BANKS(ADDR_SURF_16_BANK));
2812                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2814                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2815                                 NUM_BANKS(ADDR_SURF_16_BANK));
2816                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2818                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2819                                 NUM_BANKS(ADDR_SURF_16_BANK));
2820                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2822                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2823                                 NUM_BANKS(ADDR_SURF_16_BANK));
2824                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2826                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2827                                 NUM_BANKS(ADDR_SURF_8_BANK));
2828                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2831                                 NUM_BANKS(ADDR_SURF_4_BANK));
2832                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2833                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2834                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835                                 NUM_BANKS(ADDR_SURF_16_BANK));
2836                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2837                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2838                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839                                 NUM_BANKS(ADDR_SURF_16_BANK));
2840                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2842                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2843                                 NUM_BANKS(ADDR_SURF_16_BANK));
2844                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2846                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847                                 NUM_BANKS(ADDR_SURF_16_BANK));
2848                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851                                 NUM_BANKS(ADDR_SURF_16_BANK));
2852                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855                                 NUM_BANKS(ADDR_SURF_8_BANK));
2856                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2859                                 NUM_BANKS(ADDR_SURF_4_BANK));
2860
2861                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2862                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2863                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2864                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2865                 break;
2866
2867         case 2:
2868                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2869                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2870                            PIPE_CONFIG(ADDR_SURF_P2) |
2871                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2872                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2874                            PIPE_CONFIG(ADDR_SURF_P2) |
2875                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2876                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2877                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2878                            PIPE_CONFIG(ADDR_SURF_P2) |
2879                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2880                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2881                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2882                            PIPE_CONFIG(ADDR_SURF_P2) |
2883                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2884                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2886                            PIPE_CONFIG(ADDR_SURF_P2) |
2887                            TILE_SPLIT(split_equal_to_row_size));
2888                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2889                            PIPE_CONFIG(ADDR_SURF_P2) |
2890                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2892                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2893                            PIPE_CONFIG(ADDR_SURF_P2) |
2894                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2895                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2896                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2897                            PIPE_CONFIG(ADDR_SURF_P2) |
2898                            TILE_SPLIT(split_equal_to_row_size));
2899                 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2900                            PIPE_CONFIG(ADDR_SURF_P2);
2901                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2903                            PIPE_CONFIG(ADDR_SURF_P2));
2904                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2906                             PIPE_CONFIG(ADDR_SURF_P2) |
2907                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2908                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2909                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2910                             PIPE_CONFIG(ADDR_SURF_P2) |
2911                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2912                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2914                             PIPE_CONFIG(ADDR_SURF_P2) |
2915                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917                             PIPE_CONFIG(ADDR_SURF_P2) |
2918                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2919                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2921                             PIPE_CONFIG(ADDR_SURF_P2) |
2922                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2923                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2924                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2925                             PIPE_CONFIG(ADDR_SURF_P2) |
2926                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2928                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2929                             PIPE_CONFIG(ADDR_SURF_P2) |
2930                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2931                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2932                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2933                             PIPE_CONFIG(ADDR_SURF_P2));
2934                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2935                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2936                             PIPE_CONFIG(ADDR_SURF_P2) |
2937                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2939                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2940                             PIPE_CONFIG(ADDR_SURF_P2) |
2941                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2942                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2943                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2944                             PIPE_CONFIG(ADDR_SURF_P2) |
2945                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2946
2947                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2948                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2949                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2950                                 NUM_BANKS(ADDR_SURF_16_BANK));
2951                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2952                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2953                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2954                                 NUM_BANKS(ADDR_SURF_16_BANK));
2955                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2957                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2958                                 NUM_BANKS(ADDR_SURF_16_BANK));
2959                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2960                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2961                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2962                                 NUM_BANKS(ADDR_SURF_16_BANK));
2963                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2965                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966                                 NUM_BANKS(ADDR_SURF_16_BANK));
2967                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2969                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970                                 NUM_BANKS(ADDR_SURF_16_BANK));
2971                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2972                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2973                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2974                                 NUM_BANKS(ADDR_SURF_8_BANK));
2975                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2976                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2977                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978                                 NUM_BANKS(ADDR_SURF_16_BANK));
2979                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2980                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2981                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982                                 NUM_BANKS(ADDR_SURF_16_BANK));
2983                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2984                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2985                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986                                 NUM_BANKS(ADDR_SURF_16_BANK));
2987                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2988                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2989                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990                                 NUM_BANKS(ADDR_SURF_16_BANK));
2991                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2993                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994                                 NUM_BANKS(ADDR_SURF_16_BANK));
2995                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998                                 NUM_BANKS(ADDR_SURF_16_BANK));
2999                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3001                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3002                                 NUM_BANKS(ADDR_SURF_8_BANK));
3003
3004                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3005                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3006                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3007                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3008                 break;
3009
3010         default:
3011                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3012         }
3013 }
3014
3015 /**
3016  * cik_select_se_sh - select which SE, SH to address
3017  *
3018  * @rdev: radeon_device pointer
3019  * @se_num: shader engine to address
3020  * @sh_num: sh block to address
3021  *
3022  * Select which SE, SH combinations to address. Certain
3023  * registers are instanced per SE or SH.  0xffffffff means
3024  * broadcast to all SEs or SHs (CIK).
3025  */
3026 static void cik_select_se_sh(struct radeon_device *rdev,
3027                              u32 se_num, u32 sh_num)
3028 {
3029         u32 data = INSTANCE_BROADCAST_WRITES;
3030
3031         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3032                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3033         else if (se_num == 0xffffffff)
3034                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3035         else if (sh_num == 0xffffffff)
3036                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3037         else
3038                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3039         WREG32(GRBM_GFX_INDEX, data);
3040 }
3041
3042 /**
3043  * cik_create_bitmask - create a bitmask
3044  *
3045  * @bit_width: length of the mask
3046  *
3047  * create a variable length bit mask (CIK).
3048  * Returns the bitmask.
3049  */
3050 static u32 cik_create_bitmask(u32 bit_width)
3051 {
3052         u32 i, mask = 0;
3053
3054         for (i = 0; i < bit_width; i++) {
3055                 mask <<= 1;
3056                 mask |= 1;
3057         }
3058         return mask;
3059 }
3060
3061 /**
3062  * cik_get_rb_disabled - computes the mask of disabled RBs
3063  *
3064  * @rdev: radeon_device pointer
3065  * @max_rb_num_per_se: max RBs (render backends) per SE (shader engine) for the asic
3066  * @sh_per_se: number of SH blocks per SE for the asic
3067  *
3068  * Calculates the bitmask of disabled RBs (CIK).
3069  * Returns the disabled RB bitmask.
3070  */
3071 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3072                               u32 max_rb_num_per_se,
3073                               u32 sh_per_se)
3074 {
3075         u32 data, mask;
3076
3077         data = RREG32(CC_RB_BACKEND_DISABLE);
3078         if (data & 1)
3079                 data &= BACKEND_DISABLE_MASK;
3080         else
3081                 data = 0;
3082         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3083
3084         data >>= BACKEND_DISABLE_SHIFT;
3085
3086         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3087
3088         return data & mask;
3089 }
3090
3091 /**
3092  * cik_setup_rb - setup the RBs on the asic
3093  *
3094  * @rdev: radeon_device pointer
3095  * @se_num: number of SEs (shader engines) for the asic
3096  * @sh_per_se: number of SH blocks per SE for the asic
3097  * @max_rb_num_per_se: max RBs (render backends) per SE for the asic
3098  *
3099  * Configures per-SE/SH RB registers (CIK).
3100  */
3101 static void cik_setup_rb(struct radeon_device *rdev,
3102                          u32 se_num, u32 sh_per_se,
3103                          u32 max_rb_num_per_se)
3104 {
3105         int i, j;
3106         u32 data, mask;
3107         u32 disabled_rbs = 0;
3108         u32 enabled_rbs = 0;
3109
3110         for (i = 0; i < se_num; i++) {
3111                 for (j = 0; j < sh_per_se; j++) {
3112                         cik_select_se_sh(rdev, i, j);
3113                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3114                         if (rdev->family == CHIP_HAWAII)
3115                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3116                         else
3117                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3118                 }
3119         }
3120         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3121
3122         mask = 1;
3123         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3124                 if (!(disabled_rbs & mask))
3125                         enabled_rbs |= mask;
3126                 mask <<= 1;
3127         }
3128
3129         rdev->config.cik.backend_enable_mask = enabled_rbs;
3130
3131         for (i = 0; i < se_num; i++) {
3132                 cik_select_se_sh(rdev, i, 0xffffffff);
3133                 data = 0;
3134                 for (j = 0; j < sh_per_se; j++) {
3135                         switch (enabled_rbs & 3) {
3136                         case 0:
3137                                 if (j == 0)
3138                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3139                                 else
3140                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3141                                 break;
3142                         case 1:
3143                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3144                                 break;
3145                         case 2:
3146                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3147                                 break;
3148                         case 3:
3149                         default:
3150                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3151                                 break;
3152                         }
3153                         enabled_rbs >>= 2;
3154                 }
3155                 WREG32(PA_SC_RASTER_CONFIG, data);
3156         }
3157         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3158 }
3159
3160 /**
3161  * cik_gpu_init - setup the 3D engine
3162  *
3163  * @rdev: radeon_device pointer
3164  *
3165  * Configures the 3D engine and tiling configuration
3166  * registers so that the 3D engine is usable.
3167  */
3168 static void cik_gpu_init(struct radeon_device *rdev)
3169 {
3170         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3171         u32 mc_arb_ramcfg;
3172         u32 hdp_host_path_cntl;
3173         u32 tmp;
3174         int i, j;
3175
3176         switch (rdev->family) {
3177         case CHIP_BONAIRE:
3178                 rdev->config.cik.max_shader_engines = 2;
3179                 rdev->config.cik.max_tile_pipes = 4;
3180                 rdev->config.cik.max_cu_per_sh = 7;
3181                 rdev->config.cik.max_sh_per_se = 1;
3182                 rdev->config.cik.max_backends_per_se = 2;
3183                 rdev->config.cik.max_texture_channel_caches = 4;
3184                 rdev->config.cik.max_gprs = 256;
3185                 rdev->config.cik.max_gs_threads = 32;
3186                 rdev->config.cik.max_hw_contexts = 8;
3187
3188                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3189                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3190                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3191                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3192                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3193                 break;
3194         case CHIP_HAWAII:
3195                 rdev->config.cik.max_shader_engines = 4;
3196                 rdev->config.cik.max_tile_pipes = 16;
3197                 rdev->config.cik.max_cu_per_sh = 11;
3198                 rdev->config.cik.max_sh_per_se = 1;
3199                 rdev->config.cik.max_backends_per_se = 4;
3200                 rdev->config.cik.max_texture_channel_caches = 16;
3201                 rdev->config.cik.max_gprs = 256;
3202                 rdev->config.cik.max_gs_threads = 32;
3203                 rdev->config.cik.max_hw_contexts = 8;
3204
3205                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3206                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3207                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3208                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3209                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3210                 break;
3211         case CHIP_KAVERI:
3212                 rdev->config.cik.max_shader_engines = 1;
3213                 rdev->config.cik.max_tile_pipes = 4;
3214                 rdev->config.cik.max_cu_per_sh = 8;
3215                 rdev->config.cik.max_backends_per_se = 2;
3216                 rdev->config.cik.max_sh_per_se = 1;
3217                 rdev->config.cik.max_texture_channel_caches = 4;
3218                 rdev->config.cik.max_gprs = 256;
3219                 rdev->config.cik.max_gs_threads = 16;
3220                 rdev->config.cik.max_hw_contexts = 8;
3221
3222                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3223                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3224                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3225                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3226                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3227                 break;
3228         case CHIP_KABINI:
3229         case CHIP_MULLINS:
3230         default:
3231                 rdev->config.cik.max_shader_engines = 1;
3232                 rdev->config.cik.max_tile_pipes = 2;
3233                 rdev->config.cik.max_cu_per_sh = 2;
3234                 rdev->config.cik.max_sh_per_se = 1;
3235                 rdev->config.cik.max_backends_per_se = 1;
3236                 rdev->config.cik.max_texture_channel_caches = 2;
3237                 rdev->config.cik.max_gprs = 256;
3238                 rdev->config.cik.max_gs_threads = 16;
3239                 rdev->config.cik.max_hw_contexts = 8;
3240
3241                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3242                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3243                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3244                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3245                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3246                 break;
3247         }
3248
3249         /* Initialize HDP */
3250         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3251                 WREG32((0x2c14 + j), 0x00000000);
3252                 WREG32((0x2c18 + j), 0x00000000);
3253                 WREG32((0x2c1c + j), 0x00000000);
3254                 WREG32((0x2c20 + j), 0x00000000);
3255                 WREG32((0x2c24 + j), 0x00000000);
3256         }
3257
3258         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3259         WREG32(SRBM_INT_CNTL, 0x1);
3260         WREG32(SRBM_INT_ACK, 0x1);
3261
3262         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3263
3264         RREG32(MC_SHARED_CHMAP);
3265         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3266
3267         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3268         rdev->config.cik.mem_max_burst_length_bytes = 256;
3269         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3270         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3271         if (rdev->config.cik.mem_row_size_in_kb > 4)
3272                 rdev->config.cik.mem_row_size_in_kb = 4;
3273         /* XXX use MC settings? */
3274         rdev->config.cik.shader_engine_tile_size = 32;
3275         rdev->config.cik.num_gpus = 1;
3276         rdev->config.cik.multi_gpu_tile_size = 64;
3277
3278         /* fix up row size */
3279         gb_addr_config &= ~ROW_SIZE_MASK;
3280         switch (rdev->config.cik.mem_row_size_in_kb) {
3281         case 1:
3282         default:
3283                 gb_addr_config |= ROW_SIZE(0);
3284                 break;
3285         case 2:
3286                 gb_addr_config |= ROW_SIZE(1);
3287                 break;
3288         case 4:
3289                 gb_addr_config |= ROW_SIZE(2);
3290                 break;
3291         }
3292
3293         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3294          * not have bank info, so create a custom tiling dword.
3295          * bits 3:0   num_pipes
3296          * bits 7:4   num_banks
3297          * bits 11:8  group_size
3298          * bits 15:12 row_size
3299          */
3300         rdev->config.cik.tile_config = 0;
3301         switch (rdev->config.cik.num_tile_pipes) {
3302         case 1:
3303                 rdev->config.cik.tile_config |= (0 << 0);
3304                 break;
3305         case 2:
3306                 rdev->config.cik.tile_config |= (1 << 0);
3307                 break;
3308         case 4:
3309                 rdev->config.cik.tile_config |= (2 << 0);
3310                 break;
3311         case 8:
3312         default:
3313                 /* XXX what about 12? */
3314                 rdev->config.cik.tile_config |= (3 << 0);
3315                 break;
3316         }
3317         rdev->config.cik.tile_config |=
3318                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3319         rdev->config.cik.tile_config |=
3320                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3321         rdev->config.cik.tile_config |=
3322                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3323
3324         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3325         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3326         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3327         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3328         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3329         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3330         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3331         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3332
3333         cik_tiling_mode_table_init(rdev);
3334
3335         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3336                      rdev->config.cik.max_sh_per_se,
3337                      rdev->config.cik.max_backends_per_se);
3338
3339         rdev->config.cik.active_cus = 0;
3340         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3341                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3342                         rdev->config.cik.active_cus +=
3343                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3344                 }
3345         }
3346
3347         /* set HW defaults for 3D engine */
3348         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3349
3350         WREG32(SX_DEBUG_1, 0x20);
3351
3352         WREG32(TA_CNTL_AUX, 0x00010000);
3353
3354         tmp = RREG32(SPI_CONFIG_CNTL);
3355         tmp |= 0x03000000;
3356         WREG32(SPI_CONFIG_CNTL, tmp);
3357
3358         WREG32(SQ_CONFIG, 1);
3359
3360         WREG32(DB_DEBUG, 0);
3361
3362         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3363         tmp |= 0x00000400;
3364         WREG32(DB_DEBUG2, tmp);
3365
3366         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3367         tmp |= 0x00020200;
3368         WREG32(DB_DEBUG3, tmp);
3369
3370         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3371         tmp |= 0x00018208;
3372         WREG32(CB_HW_CONTROL, tmp);
3373
3374         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3375
3376         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3377                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3378                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3379                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3380
3381         WREG32(VGT_NUM_INSTANCES, 1);
3382
3383         WREG32(CP_PERFMON_CNTL, 0);
3384
3385         WREG32(SQ_CONFIG, 0);
3386
3387         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3388                                           FORCE_EOV_MAX_REZ_CNT(255)));
3389
3390         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3391                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3392
3393         WREG32(VGT_GS_VERTEX_REUSE, 16);
3394         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3395
3396         tmp = RREG32(HDP_MISC_CNTL);
3397         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3398         WREG32(HDP_MISC_CNTL, tmp);
3399
3400         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3401         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3402
3403         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3404         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3405
3406         udelay(50);
3407 }
3408
3409 /*
3410  * GPU scratch registers helpers function.
3411  */
3412 /**
3413  * cik_scratch_init - setup driver info for CP scratch regs
3414  *
3415  * @rdev: radeon_device pointer
3416  *
3417  * Set up the number and offset of the CP scratch registers.
3418  * NOTE: use of CP scratch registers is a legacy inferface and
3419  * is not used by default on newer asics (r6xx+).  On newer asics,
3420  * memory buffers are used for fences rather than scratch regs.
3421  */
3422 static void cik_scratch_init(struct radeon_device *rdev)
3423 {
3424         int i;
3425
3426         rdev->scratch.num_reg = 7;
3427         rdev->scratch.reg_base = SCRATCH_REG0;
3428         for (i = 0; i < rdev->scratch.num_reg; i++) {
3429                 rdev->scratch.free[i] = true;
3430                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3431         }
3432 }
3433
3434 /**
3435  * cik_ring_test - basic gfx ring test
3436  *
3437  * @rdev: radeon_device pointer
3438  * @ring: radeon_ring structure holding ring information
3439  *
3440  * Allocate a scratch register and write to it using the gfx ring (CIK).
3441  * Provides a basic gfx ring test to verify that the ring is working.
3442  * Used by cik_cp_gfx_resume();
3443  * Returns 0 on success, error on failure.
3444  */
3445 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3446 {
3447         uint32_t scratch;
3448         uint32_t tmp = 0;
3449         unsigned i;
3450         int r;
3451
3452         r = radeon_scratch_get(rdev, &scratch);
3453         if (r) {
3454                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3455                 return r;
3456         }
3457         WREG32(scratch, 0xCAFEDEAD);
3458         r = radeon_ring_lock(rdev, ring, 3);
3459         if (r) {
3460                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3461                 radeon_scratch_free(rdev, scratch);
3462                 return r;
3463         }
3464         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3465         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3466         radeon_ring_write(ring, 0xDEADBEEF);
3467         radeon_ring_unlock_commit(rdev, ring, false);
3468
3469         for (i = 0; i < rdev->usec_timeout; i++) {
3470                 tmp = RREG32(scratch);
3471                 if (tmp == 0xDEADBEEF)
3472                         break;
3473                 udelay(1);
3474         }
3475         if (i < rdev->usec_timeout) {
3476                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3477         } else {
3478                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3479                           ring->idx, scratch, tmp);
3480                 r = -EINVAL;
3481         }
3482         radeon_scratch_free(rdev, scratch);
3483         return r;
3484 }
3485
3486 /**
3487  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3488  *
3489  * @rdev: radeon_device pointer
3490  * @ridx: radeon ring index
3491  *
3492  * Emits an hdp flush on the cp.
3493  */
3494 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3495                                        int ridx)
3496 {
3497         struct radeon_ring *ring = &rdev->ring[ridx];
3498         u32 ref_and_mask;
3499
3500         switch (ring->idx) {
3501         case CAYMAN_RING_TYPE_CP1_INDEX:
3502         case CAYMAN_RING_TYPE_CP2_INDEX:
3503         default:
3504                 switch (ring->me) {
3505                 case 0:
3506                         ref_and_mask = CP2 << ring->pipe;
3507                         break;
3508                 case 1:
3509                         ref_and_mask = CP6 << ring->pipe;
3510                         break;
3511                 default:
3512                         return;
3513                 }
3514                 break;
3515         case RADEON_RING_TYPE_GFX_INDEX:
3516                 ref_and_mask = CP0;
3517                 break;
3518         }
3519
3520         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3521         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3522                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3523                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3524         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3525         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3526         radeon_ring_write(ring, ref_and_mask);
3527         radeon_ring_write(ring, ref_and_mask);
3528         radeon_ring_write(ring, 0x20); /* poll interval */
3529 }
3530
3531 /**
3532  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3533  *
3534  * @rdev: radeon_device pointer
3535  * @fence: radeon fence object
3536  *
3537  * Emits a fence sequnce number on the gfx ring and flushes
3538  * GPU caches.
3539  */
3540 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3541                              struct radeon_fence *fence)
3542 {
3543         struct radeon_ring *ring = &rdev->ring[fence->ring];
3544         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3545
3546         /* Workaround for cache flush problems. First send a dummy EOP
3547          * event down the pipe with seq one below.
3548          */
3549         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3550         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3551                                  EOP_TC_ACTION_EN |
3552                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3553                                  EVENT_INDEX(5)));
3554         radeon_ring_write(ring, addr & 0xfffffffc);
3555         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3556                                 DATA_SEL(1) | INT_SEL(0));
3557         radeon_ring_write(ring, fence->seq - 1);
3558         radeon_ring_write(ring, 0);
3559
3560         /* Then send the real EOP event down the pipe. */
3561         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3562         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3563                                  EOP_TC_ACTION_EN |
3564                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3565                                  EVENT_INDEX(5)));
3566         radeon_ring_write(ring, addr & 0xfffffffc);
3567         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3568         radeon_ring_write(ring, fence->seq);
3569         radeon_ring_write(ring, 0);
3570 }
3571
3572 /**
3573  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3574  *
3575  * @rdev: radeon_device pointer
3576  * @fence: radeon fence object
3577  *
3578  * Emits a fence sequnce number on the compute ring and flushes
3579  * GPU caches.
3580  */
3581 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3582                                  struct radeon_fence *fence)
3583 {
3584         struct radeon_ring *ring = &rdev->ring[fence->ring];
3585         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3586
3587         /* RELEASE_MEM - flush caches, send int */
3588         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3589         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3590                                  EOP_TC_ACTION_EN |
3591                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3592                                  EVENT_INDEX(5)));
3593         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3594         radeon_ring_write(ring, addr & 0xfffffffc);
3595         radeon_ring_write(ring, upper_32_bits(addr));
3596         radeon_ring_write(ring, fence->seq);
3597         radeon_ring_write(ring, 0);
3598 }
3599
3600 /**
3601  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3602  *
3603  * @rdev: radeon_device pointer
3604  * @ring: radeon ring buffer object
3605  * @semaphore: radeon semaphore object
3606  * @emit_wait: Is this a sempahore wait?
3607  *
3608  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3609  * from running ahead of semaphore waits.
3610  */
3611 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3612                              struct radeon_ring *ring,
3613                              struct radeon_semaphore *semaphore,
3614                              bool emit_wait)
3615 {
3616         uint64_t addr = semaphore->gpu_addr;
3617         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3618
3619         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3620         radeon_ring_write(ring, lower_32_bits(addr));
3621         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3622
3623         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3624                 /* Prevent the PFP from running ahead of the semaphore wait */
3625                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3626                 radeon_ring_write(ring, 0x0);
3627         }
3628
3629         return true;
3630 }
3631
3632 /**
3633  * cik_copy_cpdma - copy pages using the CP DMA engine
3634  *
3635  * @rdev: radeon_device pointer
3636  * @src_offset: src GPU address
3637  * @dst_offset: dst GPU address
3638  * @num_gpu_pages: number of GPU pages to xfer
3639  * @resv: reservation object to sync to
3640  *
3641  * Copy GPU paging using the CP DMA engine (CIK+).
3642  * Used by the radeon ttm implementation to move pages if
3643  * registered as the asic copy callback.
3644  */
3645 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3646                                     uint64_t src_offset, uint64_t dst_offset,
3647                                     unsigned num_gpu_pages,
3648                                     struct dma_resv *resv)
3649 {
3650         struct radeon_fence *fence;
3651         struct radeon_sync sync;
3652         int ring_index = rdev->asic->copy.blit_ring_index;
3653         struct radeon_ring *ring = &rdev->ring[ring_index];
3654         u32 size_in_bytes, cur_size_in_bytes, control;
3655         int i, num_loops;
3656         int r = 0;
3657
3658         radeon_sync_create(&sync);
3659
3660         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3661         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3662         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3663         if (r) {
3664                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3665                 radeon_sync_free(rdev, &sync, NULL);
3666                 return ERR_PTR(r);
3667         }
3668
3669         radeon_sync_resv(rdev, &sync, resv, false);
3670         radeon_sync_rings(rdev, &sync, ring->idx);
3671
3672         for (i = 0; i < num_loops; i++) {
3673                 cur_size_in_bytes = size_in_bytes;
3674                 if (cur_size_in_bytes > 0x1fffff)
3675                         cur_size_in_bytes = 0x1fffff;
3676                 size_in_bytes -= cur_size_in_bytes;
3677                 control = 0;
3678                 if (size_in_bytes == 0)
3679                         control |= PACKET3_DMA_DATA_CP_SYNC;
3680                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3681                 radeon_ring_write(ring, control);
3682                 radeon_ring_write(ring, lower_32_bits(src_offset));
3683                 radeon_ring_write(ring, upper_32_bits(src_offset));
3684                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3685                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3686                 radeon_ring_write(ring, cur_size_in_bytes);
3687                 src_offset += cur_size_in_bytes;
3688                 dst_offset += cur_size_in_bytes;
3689         }
3690
3691         r = radeon_fence_emit(rdev, &fence, ring->idx);
3692         if (r) {
3693                 radeon_ring_unlock_undo(rdev, ring);
3694                 radeon_sync_free(rdev, &sync, NULL);
3695                 return ERR_PTR(r);
3696         }
3697
3698         radeon_ring_unlock_commit(rdev, ring, false);
3699         radeon_sync_free(rdev, &sync, fence);
3700
3701         return fence;
3702 }
3703
3704 /*
3705  * IB stuff
3706  */
3707 /**
3708  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3709  *
3710  * @rdev: radeon_device pointer
3711  * @ib: radeon indirect buffer object
3712  *
3713  * Emits a DE (drawing engine) or CE (constant engine) IB
3714  * on the gfx ring.  IBs are usually generated by userspace
3715  * acceleration drivers and submitted to the kernel for
3716  * scheduling on the ring.  This function schedules the IB
3717  * on the gfx ring for execution by the GPU.
3718  */
3719 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3720 {
3721         struct radeon_ring *ring = &rdev->ring[ib->ring];
3722         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3723         u32 header, control = INDIRECT_BUFFER_VALID;
3724
3725         if (ib->is_const_ib) {
3726                 /* set switch buffer packet before const IB */
3727                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3728                 radeon_ring_write(ring, 0);
3729
3730                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3731         } else {
3732                 u32 next_rptr;
3733                 if (ring->rptr_save_reg) {
3734                         next_rptr = ring->wptr + 3 + 4;
3735                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3736                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3737                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3738                         radeon_ring_write(ring, next_rptr);
3739                 } else if (rdev->wb.enabled) {
3740                         next_rptr = ring->wptr + 5 + 4;
3741                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3742                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3743                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3744                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3745                         radeon_ring_write(ring, next_rptr);
3746                 }
3747
3748                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3749         }
3750
3751         control |= ib->length_dw | (vm_id << 24);
3752
3753         radeon_ring_write(ring, header);
3754         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3755         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3756         radeon_ring_write(ring, control);
3757 }
3758
3759 /**
3760  * cik_ib_test - basic gfx ring IB test
3761  *
3762  * @rdev: radeon_device pointer
3763  * @ring: radeon_ring structure holding ring information
3764  *
3765  * Allocate an IB and execute it on the gfx ring (CIK).
3766  * Provides a basic gfx ring test to verify that IBs are working.
3767  * Returns 0 on success, error on failure.
3768  */
3769 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3770 {
3771         struct radeon_ib ib;
3772         uint32_t scratch;
3773         uint32_t tmp = 0;
3774         unsigned i;
3775         int r;
3776
3777         r = radeon_scratch_get(rdev, &scratch);
3778         if (r) {
3779                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3780                 return r;
3781         }
3782         WREG32(scratch, 0xCAFEDEAD);
3783         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3784         if (r) {
3785                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3786                 radeon_scratch_free(rdev, scratch);
3787                 return r;
3788         }
3789         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3790         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3791         ib.ptr[2] = 0xDEADBEEF;
3792         ib.length_dw = 3;
3793         r = radeon_ib_schedule(rdev, &ib, NULL, false);
3794         if (r) {
3795                 radeon_scratch_free(rdev, scratch);
3796                 radeon_ib_free(rdev, &ib);
3797                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3798                 return r;
3799         }
3800         r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3801                 RADEON_USEC_IB_TEST_TIMEOUT));
3802         if (r < 0) {
3803                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3804                 radeon_scratch_free(rdev, scratch);
3805                 radeon_ib_free(rdev, &ib);
3806                 return r;
3807         } else if (r == 0) {
3808                 DRM_ERROR("radeon: fence wait timed out.\n");
3809                 radeon_scratch_free(rdev, scratch);
3810                 radeon_ib_free(rdev, &ib);
3811                 return -ETIMEDOUT;
3812         }
3813         r = 0;
3814         for (i = 0; i < rdev->usec_timeout; i++) {
3815                 tmp = RREG32(scratch);
3816                 if (tmp == 0xDEADBEEF)
3817                         break;
3818                 udelay(1);
3819         }
3820         if (i < rdev->usec_timeout) {
3821                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3822         } else {
3823                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3824                           scratch, tmp);
3825                 r = -EINVAL;
3826         }
3827         radeon_scratch_free(rdev, scratch);
3828         radeon_ib_free(rdev, &ib);
3829         return r;
3830 }
3831
3832 /*
3833  * CP.
3834  * On CIK, gfx and compute now have independant command processors.
3835  *
3836  * GFX
3837  * Gfx consists of a single ring and can process both gfx jobs and
3838  * compute jobs.  The gfx CP consists of three microengines (ME):
3839  * PFP - Pre-Fetch Parser
3840  * ME - Micro Engine
3841  * CE - Constant Engine
3842  * The PFP and ME make up what is considered the Drawing Engine (DE).
3843  * The CE is an asynchronous engine used for updating buffer desciptors
3844  * used by the DE so that they can be loaded into cache in parallel
3845  * while the DE is processing state update packets.
3846  *
3847  * Compute
3848  * The compute CP consists of two microengines (ME):
3849  * MEC1 - Compute MicroEngine 1
3850  * MEC2 - Compute MicroEngine 2
3851  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3852  * The queues are exposed to userspace and are programmed directly
3853  * by the compute runtime.
3854  */
3855 /**
3856  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3857  *
3858  * @rdev: radeon_device pointer
3859  * @enable: enable or disable the MEs
3860  *
3861  * Halts or unhalts the gfx MEs.
3862  */
3863 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3864 {
3865         if (enable)
3866                 WREG32(CP_ME_CNTL, 0);
3867         else {
3868                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3869                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3870                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3871                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3872         }
3873         udelay(50);
3874 }
3875
3876 /**
3877  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3878  *
3879  * @rdev: radeon_device pointer
3880  *
3881  * Loads the gfx PFP, ME, and CE ucode.
3882  * Returns 0 for success, -EINVAL if the ucode is not available.
3883  */
3884 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3885 {
3886         int i;
3887
3888         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3889                 return -EINVAL;
3890
3891         cik_cp_gfx_enable(rdev, false);
3892
3893         if (rdev->new_fw) {
3894                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3895                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3896                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3897                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3898                 const struct gfx_firmware_header_v1_0 *me_hdr =
3899                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3900                 const __le32 *fw_data;
3901                 u32 fw_size;
3902
3903                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3904                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3905                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3906
3907                 /* PFP */
3908                 fw_data = (const __le32 *)
3909                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3910                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3911                 WREG32(CP_PFP_UCODE_ADDR, 0);
3912                 for (i = 0; i < fw_size; i++)
3913                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3914                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3915
3916                 /* CE */
3917                 fw_data = (const __le32 *)
3918                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3919                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3920                 WREG32(CP_CE_UCODE_ADDR, 0);
3921                 for (i = 0; i < fw_size; i++)
3922                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3923                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3924
3925                 /* ME */
3926                 fw_data = (const __be32 *)
3927                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3928                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3929                 WREG32(CP_ME_RAM_WADDR, 0);
3930                 for (i = 0; i < fw_size; i++)
3931                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3932                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3933                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3934         } else {
3935                 const __be32 *fw_data;
3936
3937                 /* PFP */
3938                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3939                 WREG32(CP_PFP_UCODE_ADDR, 0);
3940                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3941                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3942                 WREG32(CP_PFP_UCODE_ADDR, 0);
3943
3944                 /* CE */
3945                 fw_data = (const __be32 *)rdev->ce_fw->data;
3946                 WREG32(CP_CE_UCODE_ADDR, 0);
3947                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3948                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3949                 WREG32(CP_CE_UCODE_ADDR, 0);
3950
3951                 /* ME */
3952                 fw_data = (const __be32 *)rdev->me_fw->data;
3953                 WREG32(CP_ME_RAM_WADDR, 0);
3954                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3955                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3956                 WREG32(CP_ME_RAM_WADDR, 0);
3957         }
3958
3959         return 0;
3960 }
3961
3962 /**
3963  * cik_cp_gfx_start - start the gfx ring
3964  *
3965  * @rdev: radeon_device pointer
3966  *
3967  * Enables the ring and loads the clear state context and other
3968  * packets required to init the ring.
3969  * Returns 0 for success, error for failure.
3970  */
3971 static int cik_cp_gfx_start(struct radeon_device *rdev)
3972 {
3973         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3974         int r, i;
3975
3976         /* init the CP */
3977         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3978         WREG32(CP_ENDIAN_SWAP, 0);
3979         WREG32(CP_DEVICE_ID, 1);
3980
3981         cik_cp_gfx_enable(rdev, true);
3982
3983         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3984         if (r) {
3985                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3986                 return r;
3987         }
3988
3989         /* init the CE partitions.  CE only used for gfx on CIK */
3990         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3991         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3992         radeon_ring_write(ring, 0x8000);
3993         radeon_ring_write(ring, 0x8000);
3994
3995         /* setup clear context state */
3996         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3997         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3998
3999         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4000         radeon_ring_write(ring, 0x80000000);
4001         radeon_ring_write(ring, 0x80000000);
4002
4003         for (i = 0; i < cik_default_size; i++)
4004                 radeon_ring_write(ring, cik_default_state[i]);
4005
4006         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4007         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4008
4009         /* set clear context state */
4010         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4011         radeon_ring_write(ring, 0);
4012
4013         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4014         radeon_ring_write(ring, 0x00000316);
4015         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4016         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4017
4018         radeon_ring_unlock_commit(rdev, ring, false);
4019
4020         return 0;
4021 }
4022
4023 /**
4024  * cik_cp_gfx_fini - stop the gfx ring
4025  *
4026  * @rdev: radeon_device pointer
4027  *
4028  * Stop the gfx ring and tear down the driver ring
4029  * info.
4030  */
4031 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4032 {
4033         cik_cp_gfx_enable(rdev, false);
4034         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4035 }
4036
4037 /**
4038  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4039  *
4040  * @rdev: radeon_device pointer
4041  *
4042  * Program the location and size of the gfx ring buffer
4043  * and test it to make sure it's working.
4044  * Returns 0 for success, error for failure.
4045  */
4046 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4047 {
4048         struct radeon_ring *ring;
4049         u32 tmp;
4050         u32 rb_bufsz;
4051         u64 rb_addr;
4052         int r;
4053
4054         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4055         if (rdev->family != CHIP_HAWAII)
4056                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4057
4058         /* Set the write pointer delay */
4059         WREG32(CP_RB_WPTR_DELAY, 0);
4060
4061         /* set the RB to use vmid 0 */
4062         WREG32(CP_RB_VMID, 0);
4063
4064         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4065
4066         /* ring 0 - compute and gfx */
4067         /* Set ring buffer size */
4068         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4069         rb_bufsz = order_base_2(ring->ring_size / 8);
4070         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4071 #ifdef __BIG_ENDIAN
4072         tmp |= BUF_SWAP_32BIT;
4073 #endif
4074         WREG32(CP_RB0_CNTL, tmp);
4075
4076         /* Initialize the ring buffer's read and write pointers */
4077         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4078         ring->wptr = 0;
4079         WREG32(CP_RB0_WPTR, ring->wptr);
4080
4081         /* set the wb address wether it's enabled or not */
4082         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4083         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4084
4085         /* scratch register shadowing is no longer supported */
4086         WREG32(SCRATCH_UMSK, 0);
4087
4088         if (!rdev->wb.enabled)
4089                 tmp |= RB_NO_UPDATE;
4090
4091         mdelay(1);
4092         WREG32(CP_RB0_CNTL, tmp);
4093
4094         rb_addr = ring->gpu_addr >> 8;
4095         WREG32(CP_RB0_BASE, rb_addr);
4096         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4097
4098         /* start the ring */
4099         cik_cp_gfx_start(rdev);
4100         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4101         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4102         if (r) {
4103                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4104                 return r;
4105         }
4106
4107         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4108                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4109
4110         return 0;
4111 }
4112
4113 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4114                      struct radeon_ring *ring)
4115 {
4116         u32 rptr;
4117
4118         if (rdev->wb.enabled)
4119                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4120         else
4121                 rptr = RREG32(CP_RB0_RPTR);
4122
4123         return rptr;
4124 }
4125
4126 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4127                      struct radeon_ring *ring)
4128 {
4129         return RREG32(CP_RB0_WPTR);
4130 }
4131
4132 void cik_gfx_set_wptr(struct radeon_device *rdev,
4133                       struct radeon_ring *ring)
4134 {
4135         WREG32(CP_RB0_WPTR, ring->wptr);
4136         (void)RREG32(CP_RB0_WPTR);
4137 }
4138
4139 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4140                          struct radeon_ring *ring)
4141 {
4142         u32 rptr;
4143
4144         if (rdev->wb.enabled) {
4145                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4146         } else {
4147                 mutex_lock(&rdev->srbm_mutex);
4148                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4149                 rptr = RREG32(CP_HQD_PQ_RPTR);
4150                 cik_srbm_select(rdev, 0, 0, 0, 0);
4151                 mutex_unlock(&rdev->srbm_mutex);
4152         }
4153
4154         return rptr;
4155 }
4156
4157 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4158                          struct radeon_ring *ring)
4159 {
4160         u32 wptr;
4161
4162         if (rdev->wb.enabled) {
4163                 /* XXX check if swapping is necessary on BE */
4164                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4165         } else {
4166                 mutex_lock(&rdev->srbm_mutex);
4167                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4168                 wptr = RREG32(CP_HQD_PQ_WPTR);
4169                 cik_srbm_select(rdev, 0, 0, 0, 0);
4170                 mutex_unlock(&rdev->srbm_mutex);
4171         }
4172
4173         return wptr;
4174 }
4175
4176 void cik_compute_set_wptr(struct radeon_device *rdev,
4177                           struct radeon_ring *ring)
4178 {
4179         /* XXX check if swapping is necessary on BE */
4180         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4181         WDOORBELL32(ring->doorbell_index, ring->wptr);
4182 }
4183
4184 static void cik_compute_stop(struct radeon_device *rdev,
4185                              struct radeon_ring *ring)
4186 {
4187         u32 j, tmp;
4188
4189         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4190         /* Disable wptr polling. */
4191         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4192         tmp &= ~WPTR_POLL_EN;
4193         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4194         /* Disable HQD. */
4195         if (RREG32(CP_HQD_ACTIVE) & 1) {
4196                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4197                 for (j = 0; j < rdev->usec_timeout; j++) {
4198                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4199                                 break;
4200                         udelay(1);
4201                 }
4202                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4203                 WREG32(CP_HQD_PQ_RPTR, 0);
4204                 WREG32(CP_HQD_PQ_WPTR, 0);
4205         }
4206         cik_srbm_select(rdev, 0, 0, 0, 0);
4207 }
4208
4209 /**
4210  * cik_cp_compute_enable - enable/disable the compute CP MEs
4211  *
4212  * @rdev: radeon_device pointer
4213  * @enable: enable or disable the MEs
4214  *
4215  * Halts or unhalts the compute MEs.
4216  */
4217 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4218 {
4219         if (enable)
4220                 WREG32(CP_MEC_CNTL, 0);
4221         else {
4222                 /*
4223                  * To make hibernation reliable we need to clear compute ring
4224                  * configuration before halting the compute ring.
4225                  */
4226                 mutex_lock(&rdev->srbm_mutex);
4227                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4228                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4229                 mutex_unlock(&rdev->srbm_mutex);
4230
4231                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4232                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4233                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4234         }
4235         udelay(50);
4236 }
4237
4238 /**
4239  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4240  *
4241  * @rdev: radeon_device pointer
4242  *
4243  * Loads the compute MEC1&2 ucode.
4244  * Returns 0 for success, -EINVAL if the ucode is not available.
4245  */
4246 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4247 {
4248         int i;
4249
4250         if (!rdev->mec_fw)
4251                 return -EINVAL;
4252
4253         cik_cp_compute_enable(rdev, false);
4254
4255         if (rdev->new_fw) {
4256                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4257                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4258                 const __le32 *fw_data;
4259                 u32 fw_size;
4260
4261                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4262
4263                 /* MEC1 */
4264                 fw_data = (const __le32 *)
4265                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4266                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4267                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4268                 for (i = 0; i < fw_size; i++)
4269                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4270                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4271
4272                 /* MEC2 */
4273                 if (rdev->family == CHIP_KAVERI) {
4274                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4275                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4276
4277                         fw_data = (const __le32 *)
4278                                 (rdev->mec2_fw->data +
4279                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4280                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4281                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4282                         for (i = 0; i < fw_size; i++)
4283                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4284                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4285                 }
4286         } else {
4287                 const __be32 *fw_data;
4288
4289                 /* MEC1 */
4290                 fw_data = (const __be32 *)rdev->mec_fw->data;
4291                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4292                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4293                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4294                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4295
4296                 if (rdev->family == CHIP_KAVERI) {
4297                         /* MEC2 */
4298                         fw_data = (const __be32 *)rdev->mec_fw->data;
4299                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4300                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4301                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4302                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4303                 }
4304         }
4305
4306         return 0;
4307 }
4308
4309 /**
4310  * cik_cp_compute_start - start the compute queues
4311  *
4312  * @rdev: radeon_device pointer
4313  *
4314  * Enable the compute queues.
4315  * Returns 0 for success, error for failure.
4316  */
4317 static int cik_cp_compute_start(struct radeon_device *rdev)
4318 {
4319         cik_cp_compute_enable(rdev, true);
4320
4321         return 0;
4322 }
4323
4324 /**
4325  * cik_cp_compute_fini - stop the compute queues
4326  *
4327  * @rdev: radeon_device pointer
4328  *
4329  * Stop the compute queues and tear down the driver queue
4330  * info.
4331  */
4332 static void cik_cp_compute_fini(struct radeon_device *rdev)
4333 {
4334         int i, idx, r;
4335
4336         cik_cp_compute_enable(rdev, false);
4337
4338         for (i = 0; i < 2; i++) {
4339                 if (i == 0)
4340                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4341                 else
4342                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4343
4344                 if (rdev->ring[idx].mqd_obj) {
4345                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4346                         if (unlikely(r != 0))
4347                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4348
4349                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4350                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4351
4352                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4353                         rdev->ring[idx].mqd_obj = NULL;
4354                 }
4355         }
4356 }
4357
4358 static void cik_mec_fini(struct radeon_device *rdev)
4359 {
4360         int r;
4361
4362         if (rdev->mec.hpd_eop_obj) {
4363                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4364                 if (unlikely(r != 0))
4365                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4366                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4367                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4368
4369                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4370                 rdev->mec.hpd_eop_obj = NULL;
4371         }
4372 }
4373
4374 #define MEC_HPD_SIZE 2048
4375
4376 static int cik_mec_init(struct radeon_device *rdev)
4377 {
4378         int r;
4379         u32 *hpd;
4380
4381         /*
4382          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4383          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4384          */
4385         if (rdev->family == CHIP_KAVERI)
4386                 rdev->mec.num_mec = 2;
4387         else
4388                 rdev->mec.num_mec = 1;
4389         rdev->mec.num_pipe = 4;
4390         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4391
4392         if (rdev->mec.hpd_eop_obj == NULL) {
4393                 r = radeon_bo_create(rdev,
4394                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4395                                      PAGE_SIZE, true,
4396                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4397                                      &rdev->mec.hpd_eop_obj);
4398                 if (r) {
4399                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4400                         return r;
4401                 }
4402         }
4403
4404         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4405         if (unlikely(r != 0)) {
4406                 cik_mec_fini(rdev);
4407                 return r;
4408         }
4409         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4410                           &rdev->mec.hpd_eop_gpu_addr);
4411         if (r) {
4412                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4413                 cik_mec_fini(rdev);
4414                 return r;
4415         }
4416         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4417         if (r) {
4418                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4419                 cik_mec_fini(rdev);
4420                 return r;
4421         }
4422
4423         /* clear memory.  Not sure if this is required or not */
4424         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4425
4426         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4427         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4428
4429         return 0;
4430 }
4431
4432 struct hqd_registers
4433 {
4434         u32 cp_mqd_base_addr;
4435         u32 cp_mqd_base_addr_hi;
4436         u32 cp_hqd_active;
4437         u32 cp_hqd_vmid;
4438         u32 cp_hqd_persistent_state;
4439         u32 cp_hqd_pipe_priority;
4440         u32 cp_hqd_queue_priority;
4441         u32 cp_hqd_quantum;
4442         u32 cp_hqd_pq_base;
4443         u32 cp_hqd_pq_base_hi;
4444         u32 cp_hqd_pq_rptr;
4445         u32 cp_hqd_pq_rptr_report_addr;
4446         u32 cp_hqd_pq_rptr_report_addr_hi;
4447         u32 cp_hqd_pq_wptr_poll_addr;
4448         u32 cp_hqd_pq_wptr_poll_addr_hi;
4449         u32 cp_hqd_pq_doorbell_control;
4450         u32 cp_hqd_pq_wptr;
4451         u32 cp_hqd_pq_control;
4452         u32 cp_hqd_ib_base_addr;
4453         u32 cp_hqd_ib_base_addr_hi;
4454         u32 cp_hqd_ib_rptr;
4455         u32 cp_hqd_ib_control;
4456         u32 cp_hqd_iq_timer;
4457         u32 cp_hqd_iq_rptr;
4458         u32 cp_hqd_dequeue_request;
4459         u32 cp_hqd_dma_offload;
4460         u32 cp_hqd_sema_cmd;
4461         u32 cp_hqd_msg_type;
4462         u32 cp_hqd_atomic0_preop_lo;
4463         u32 cp_hqd_atomic0_preop_hi;
4464         u32 cp_hqd_atomic1_preop_lo;
4465         u32 cp_hqd_atomic1_preop_hi;
4466         u32 cp_hqd_hq_scheduler0;
4467         u32 cp_hqd_hq_scheduler1;
4468         u32 cp_mqd_control;
4469 };
4470
4471 struct bonaire_mqd
4472 {
4473         u32 header;
4474         u32 dispatch_initiator;
4475         u32 dimensions[3];
4476         u32 start_idx[3];
4477         u32 num_threads[3];
4478         u32 pipeline_stat_enable;
4479         u32 perf_counter_enable;
4480         u32 pgm[2];
4481         u32 tba[2];
4482         u32 tma[2];
4483         u32 pgm_rsrc[2];
4484         u32 vmid;
4485         u32 resource_limits;
4486         u32 static_thread_mgmt01[2];
4487         u32 tmp_ring_size;
4488         u32 static_thread_mgmt23[2];
4489         u32 restart[3];
4490         u32 thread_trace_enable;
4491         u32 reserved1;
4492         u32 user_data[16];
4493         u32 vgtcs_invoke_count[2];
4494         struct hqd_registers queue_state;
4495         u32 dequeue_cntr;
4496         u32 interrupt_queue[64];
4497 };
4498
4499 /**
4500  * cik_cp_compute_resume - setup the compute queue registers
4501  *
4502  * @rdev: radeon_device pointer
4503  *
4504  * Program the compute queues and test them to make sure they
4505  * are working.
4506  * Returns 0 for success, error for failure.
4507  */
4508 static int cik_cp_compute_resume(struct radeon_device *rdev)
4509 {
4510         int r, i, j, idx;
4511         u32 tmp;
4512         bool use_doorbell = true;
4513         u64 hqd_gpu_addr;
4514         u64 mqd_gpu_addr;
4515         u64 eop_gpu_addr;
4516         u64 wb_gpu_addr;
4517         u32 *buf;
4518         struct bonaire_mqd *mqd;
4519
4520         r = cik_cp_compute_start(rdev);
4521         if (r)
4522                 return r;
4523
4524         /* fix up chicken bits */
4525         tmp = RREG32(CP_CPF_DEBUG);
4526         tmp |= (1 << 23);
4527         WREG32(CP_CPF_DEBUG, tmp);
4528
4529         /* init the pipes */
4530         mutex_lock(&rdev->srbm_mutex);
4531
4532         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4533                 int me = (i < 4) ? 1 : 2;
4534                 int pipe = (i < 4) ? i : (i - 4);
4535
4536                 cik_srbm_select(rdev, me, pipe, 0, 0);
4537
4538                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4539                 /* write the EOP addr */
4540                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4541                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4542
4543                 /* set the VMID assigned */
4544                 WREG32(CP_HPD_EOP_VMID, 0);
4545
4546                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4547                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4548                 tmp &= ~EOP_SIZE_MASK;
4549                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4550                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4551
4552         }
4553         cik_srbm_select(rdev, 0, 0, 0, 0);
4554         mutex_unlock(&rdev->srbm_mutex);
4555
4556         /* init the queues.  Just two for now. */
4557         for (i = 0; i < 2; i++) {
4558                 if (i == 0)
4559                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4560                 else
4561                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4562
4563                 if (rdev->ring[idx].mqd_obj == NULL) {
4564                         r = radeon_bo_create(rdev,
4565                                              sizeof(struct bonaire_mqd),
4566                                              PAGE_SIZE, true,
4567                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4568                                              NULL, &rdev->ring[idx].mqd_obj);
4569                         if (r) {
4570                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4571                                 return r;
4572                         }
4573                 }
4574
4575                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4576                 if (unlikely(r != 0)) {
4577                         cik_cp_compute_fini(rdev);
4578                         return r;
4579                 }
4580                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4581                                   &mqd_gpu_addr);
4582                 if (r) {
4583                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4584                         cik_cp_compute_fini(rdev);
4585                         return r;
4586                 }
4587                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4588                 if (r) {
4589                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4590                         cik_cp_compute_fini(rdev);
4591                         return r;
4592                 }
4593
4594                 /* init the mqd struct */
4595                 memset(buf, 0, sizeof(struct bonaire_mqd));
4596
4597                 mqd = (struct bonaire_mqd *)buf;
4598                 mqd->header = 0xC0310800;
4599                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4600                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4601                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4602                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4603
4604                 mutex_lock(&rdev->srbm_mutex);
4605                 cik_srbm_select(rdev, rdev->ring[idx].me,
4606                                 rdev->ring[idx].pipe,
4607                                 rdev->ring[idx].queue, 0);
4608
4609                 /* disable wptr polling */
4610                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4611                 tmp &= ~WPTR_POLL_EN;
4612                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4613
4614                 /* enable doorbell? */
4615                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4616                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4617                 if (use_doorbell)
4618                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4619                 else
4620                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4621                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4622                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4623
4624                 /* disable the queue if it's active */
4625                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4626                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4627                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4628                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4629                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4630                         for (j = 0; j < rdev->usec_timeout; j++) {
4631                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4632                                         break;
4633                                 udelay(1);
4634                         }
4635                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4636                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4637                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4638                 }
4639
4640                 /* set the pointer to the MQD */
4641                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4642                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4643                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4644                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4645                 /* set MQD vmid to 0 */
4646                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4647                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4648                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4649
4650                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4651                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4652                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4653                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4654                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4655                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4656
4657                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4658                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4659                 mqd->queue_state.cp_hqd_pq_control &=
4660                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4661
4662                 mqd->queue_state.cp_hqd_pq_control |=
4663                         order_base_2(rdev->ring[idx].ring_size / 8);
4664                 mqd->queue_state.cp_hqd_pq_control |=
4665                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4666 #ifdef __BIG_ENDIAN
4667                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4668 #endif
4669                 mqd->queue_state.cp_hqd_pq_control &=
4670                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4671                 mqd->queue_state.cp_hqd_pq_control |=
4672                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4673                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4674
4675                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4676                 if (i == 0)
4677                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4678                 else
4679                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4680                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4681                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4682                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4683                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4684                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4685
4686                 /* set the wb address wether it's enabled or not */
4687                 if (i == 0)
4688                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4689                 else
4690                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4691                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4692                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4693                         upper_32_bits(wb_gpu_addr) & 0xffff;
4694                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4695                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4696                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4697                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4698
4699                 /* enable the doorbell if requested */
4700                 if (use_doorbell) {
4701                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4702                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4703                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4704                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4705                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4706                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4707                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4708                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4709
4710                 } else {
4711                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4712                 }
4713                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4714                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4715
4716                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4717                 rdev->ring[idx].wptr = 0;
4718                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4719                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4720                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4721
4722                 /* set the vmid for the queue */
4723                 mqd->queue_state.cp_hqd_vmid = 0;
4724                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4725
4726                 /* activate the queue */
4727                 mqd->queue_state.cp_hqd_active = 1;
4728                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4729
4730                 cik_srbm_select(rdev, 0, 0, 0, 0);
4731                 mutex_unlock(&rdev->srbm_mutex);
4732
4733                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4734                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4735
4736                 rdev->ring[idx].ready = true;
4737                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4738                 if (r)
4739                         rdev->ring[idx].ready = false;
4740         }
4741
4742         return 0;
4743 }
4744
4745 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4746 {
4747         cik_cp_gfx_enable(rdev, enable);
4748         cik_cp_compute_enable(rdev, enable);
4749 }
4750
4751 static int cik_cp_load_microcode(struct radeon_device *rdev)
4752 {
4753         int r;
4754
4755         r = cik_cp_gfx_load_microcode(rdev);
4756         if (r)
4757                 return r;
4758         r = cik_cp_compute_load_microcode(rdev);
4759         if (r)
4760                 return r;
4761
4762         return 0;
4763 }
4764
4765 static void cik_cp_fini(struct radeon_device *rdev)
4766 {
4767         cik_cp_gfx_fini(rdev);
4768         cik_cp_compute_fini(rdev);
4769 }
4770
4771 static int cik_cp_resume(struct radeon_device *rdev)
4772 {
4773         int r;
4774
4775         cik_enable_gui_idle_interrupt(rdev, false);
4776
4777         r = cik_cp_load_microcode(rdev);
4778         if (r)
4779                 return r;
4780
4781         r = cik_cp_gfx_resume(rdev);
4782         if (r)
4783                 return r;
4784         r = cik_cp_compute_resume(rdev);
4785         if (r)
4786                 return r;
4787
4788         cik_enable_gui_idle_interrupt(rdev, true);
4789
4790         return 0;
4791 }
4792
4793 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4794 {
4795         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4796                 RREG32(GRBM_STATUS));
4797         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4798                 RREG32(GRBM_STATUS2));
4799         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4800                 RREG32(GRBM_STATUS_SE0));
4801         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4802                 RREG32(GRBM_STATUS_SE1));
4803         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4804                 RREG32(GRBM_STATUS_SE2));
4805         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4806                 RREG32(GRBM_STATUS_SE3));
4807         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4808                 RREG32(SRBM_STATUS));
4809         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4810                 RREG32(SRBM_STATUS2));
4811         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4812                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4813         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4814                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4815         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4816         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4817                  RREG32(CP_STALLED_STAT1));
4818         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4819                  RREG32(CP_STALLED_STAT2));
4820         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4821                  RREG32(CP_STALLED_STAT3));
4822         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4823                  RREG32(CP_CPF_BUSY_STAT));
4824         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4825                  RREG32(CP_CPF_STALLED_STAT1));
4826         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4827         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4828         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4829                  RREG32(CP_CPC_STALLED_STAT1));
4830         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4831 }
4832
4833 /**
4834  * cik_gpu_check_soft_reset - check which blocks are busy
4835  *
4836  * @rdev: radeon_device pointer
4837  *
4838  * Check which blocks are busy and return the relevant reset
4839  * mask to be used by cik_gpu_soft_reset().
4840  * Returns a mask of the blocks to be reset.
4841  */
4842 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4843 {
4844         u32 reset_mask = 0;
4845         u32 tmp;
4846
4847         /* GRBM_STATUS */
4848         tmp = RREG32(GRBM_STATUS);
4849         if (tmp & (PA_BUSY | SC_BUSY |
4850                    BCI_BUSY | SX_BUSY |
4851                    TA_BUSY | VGT_BUSY |
4852                    DB_BUSY | CB_BUSY |
4853                    GDS_BUSY | SPI_BUSY |
4854                    IA_BUSY | IA_BUSY_NO_DMA))
4855                 reset_mask |= RADEON_RESET_GFX;
4856
4857         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4858                 reset_mask |= RADEON_RESET_CP;
4859
4860         /* GRBM_STATUS2 */
4861         tmp = RREG32(GRBM_STATUS2);
4862         if (tmp & RLC_BUSY)
4863                 reset_mask |= RADEON_RESET_RLC;
4864
4865         /* SDMA0_STATUS_REG */
4866         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4867         if (!(tmp & SDMA_IDLE))
4868                 reset_mask |= RADEON_RESET_DMA;
4869
4870         /* SDMA1_STATUS_REG */
4871         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4872         if (!(tmp & SDMA_IDLE))
4873                 reset_mask |= RADEON_RESET_DMA1;
4874
4875         /* SRBM_STATUS2 */
4876         tmp = RREG32(SRBM_STATUS2);
4877         if (tmp & SDMA_BUSY)
4878                 reset_mask |= RADEON_RESET_DMA;
4879
4880         if (tmp & SDMA1_BUSY)
4881                 reset_mask |= RADEON_RESET_DMA1;
4882
4883         /* SRBM_STATUS */
4884         tmp = RREG32(SRBM_STATUS);
4885
4886         if (tmp & IH_BUSY)
4887                 reset_mask |= RADEON_RESET_IH;
4888
4889         if (tmp & SEM_BUSY)
4890                 reset_mask |= RADEON_RESET_SEM;
4891
4892         if (tmp & GRBM_RQ_PENDING)
4893                 reset_mask |= RADEON_RESET_GRBM;
4894
4895         if (tmp & VMC_BUSY)
4896                 reset_mask |= RADEON_RESET_VMC;
4897
4898         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4899                    MCC_BUSY | MCD_BUSY))
4900                 reset_mask |= RADEON_RESET_MC;
4901
4902         if (evergreen_is_display_hung(rdev))
4903                 reset_mask |= RADEON_RESET_DISPLAY;
4904
4905         /* Skip MC reset as it's mostly likely not hung, just busy */
4906         if (reset_mask & RADEON_RESET_MC) {
4907                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4908                 reset_mask &= ~RADEON_RESET_MC;
4909         }
4910
4911         return reset_mask;
4912 }
4913
4914 /**
4915  * cik_gpu_soft_reset - soft reset GPU
4916  *
4917  * @rdev: radeon_device pointer
4918  * @reset_mask: mask of which blocks to reset
4919  *
4920  * Soft reset the blocks specified in @reset_mask.
4921  */
4922 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4923 {
4924         struct evergreen_mc_save save;
4925         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4926         u32 tmp;
4927
4928         if (reset_mask == 0)
4929                 return;
4930
4931         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4932
4933         cik_print_gpu_status_regs(rdev);
4934         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4935                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4936         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4937                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4938
4939         /* disable CG/PG */
4940         cik_fini_pg(rdev);
4941         cik_fini_cg(rdev);
4942
4943         /* stop the rlc */
4944         cik_rlc_stop(rdev);
4945
4946         /* Disable GFX parsing/prefetching */
4947         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4948
4949         /* Disable MEC parsing/prefetching */
4950         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4951
4952         if (reset_mask & RADEON_RESET_DMA) {
4953                 /* sdma0 */
4954                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4955                 tmp |= SDMA_HALT;
4956                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4957         }
4958         if (reset_mask & RADEON_RESET_DMA1) {
4959                 /* sdma1 */
4960                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4961                 tmp |= SDMA_HALT;
4962                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4963         }
4964
4965         evergreen_mc_stop(rdev, &save);
4966         if (evergreen_mc_wait_for_idle(rdev)) {
4967                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4968         }
4969
4970         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4971                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4972
4973         if (reset_mask & RADEON_RESET_CP) {
4974                 grbm_soft_reset |= SOFT_RESET_CP;
4975
4976                 srbm_soft_reset |= SOFT_RESET_GRBM;
4977         }
4978
4979         if (reset_mask & RADEON_RESET_DMA)
4980                 srbm_soft_reset |= SOFT_RESET_SDMA;
4981
4982         if (reset_mask & RADEON_RESET_DMA1)
4983                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4984
4985         if (reset_mask & RADEON_RESET_DISPLAY)
4986                 srbm_soft_reset |= SOFT_RESET_DC;
4987
4988         if (reset_mask & RADEON_RESET_RLC)
4989                 grbm_soft_reset |= SOFT_RESET_RLC;
4990
4991         if (reset_mask & RADEON_RESET_SEM)
4992                 srbm_soft_reset |= SOFT_RESET_SEM;
4993
4994         if (reset_mask & RADEON_RESET_IH)
4995                 srbm_soft_reset |= SOFT_RESET_IH;
4996
4997         if (reset_mask & RADEON_RESET_GRBM)
4998                 srbm_soft_reset |= SOFT_RESET_GRBM;
4999
5000         if (reset_mask & RADEON_RESET_VMC)
5001                 srbm_soft_reset |= SOFT_RESET_VMC;
5002
5003         if (!(rdev->flags & RADEON_IS_IGP)) {
5004                 if (reset_mask & RADEON_RESET_MC)
5005                         srbm_soft_reset |= SOFT_RESET_MC;
5006         }
5007
5008         if (grbm_soft_reset) {
5009                 tmp = RREG32(GRBM_SOFT_RESET);
5010                 tmp |= grbm_soft_reset;
5011                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5012                 WREG32(GRBM_SOFT_RESET, tmp);
5013                 tmp = RREG32(GRBM_SOFT_RESET);
5014
5015                 udelay(50);
5016
5017                 tmp &= ~grbm_soft_reset;
5018                 WREG32(GRBM_SOFT_RESET, tmp);
5019                 tmp = RREG32(GRBM_SOFT_RESET);
5020         }
5021
5022         if (srbm_soft_reset) {
5023                 tmp = RREG32(SRBM_SOFT_RESET);
5024                 tmp |= srbm_soft_reset;
5025                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5026                 WREG32(SRBM_SOFT_RESET, tmp);
5027                 tmp = RREG32(SRBM_SOFT_RESET);
5028
5029                 udelay(50);
5030
5031                 tmp &= ~srbm_soft_reset;
5032                 WREG32(SRBM_SOFT_RESET, tmp);
5033                 tmp = RREG32(SRBM_SOFT_RESET);
5034         }
5035
5036         /* Wait a little for things to settle down */
5037         udelay(50);
5038
5039         evergreen_mc_resume(rdev, &save);
5040         udelay(50);
5041
5042         cik_print_gpu_status_regs(rdev);
5043 }
5044
5045 struct kv_reset_save_regs {
5046         u32 gmcon_reng_execute;
5047         u32 gmcon_misc;
5048         u32 gmcon_misc3;
5049 };
5050
5051 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5052                                    struct kv_reset_save_regs *save)
5053 {
5054         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5055         save->gmcon_misc = RREG32(GMCON_MISC);
5056         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5057
5058         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5059         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5060                                                 STCTRL_STUTTER_EN));
5061 }
5062
5063 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5064                                       struct kv_reset_save_regs *save)
5065 {
5066         int i;
5067
5068         WREG32(GMCON_PGFSM_WRITE, 0);
5069         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5070
5071         for (i = 0; i < 5; i++)
5072                 WREG32(GMCON_PGFSM_WRITE, 0);
5073
5074         WREG32(GMCON_PGFSM_WRITE, 0);
5075         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5076
5077         for (i = 0; i < 5; i++)
5078                 WREG32(GMCON_PGFSM_WRITE, 0);
5079
5080         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5081         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5082
5083         for (i = 0; i < 5; i++)
5084                 WREG32(GMCON_PGFSM_WRITE, 0);
5085
5086         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5087         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5088
5089         for (i = 0; i < 5; i++)
5090                 WREG32(GMCON_PGFSM_WRITE, 0);
5091
5092         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5093         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5094
5095         for (i = 0; i < 5; i++)
5096                 WREG32(GMCON_PGFSM_WRITE, 0);
5097
5098         WREG32(GMCON_PGFSM_WRITE, 0);
5099         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5100
5101         for (i = 0; i < 5; i++)
5102                 WREG32(GMCON_PGFSM_WRITE, 0);
5103
5104         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5105         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5106
5107         for (i = 0; i < 5; i++)
5108                 WREG32(GMCON_PGFSM_WRITE, 0);
5109
5110         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5111         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5112
5113         for (i = 0; i < 5; i++)
5114                 WREG32(GMCON_PGFSM_WRITE, 0);
5115
5116         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5117         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5118
5119         for (i = 0; i < 5; i++)
5120                 WREG32(GMCON_PGFSM_WRITE, 0);
5121
5122         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5123         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5124
5125         for (i = 0; i < 5; i++)
5126                 WREG32(GMCON_PGFSM_WRITE, 0);
5127
5128         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5129         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5130
5131         WREG32(GMCON_MISC3, save->gmcon_misc3);
5132         WREG32(GMCON_MISC, save->gmcon_misc);
5133         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5134 }
5135
5136 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5137 {
5138         struct evergreen_mc_save save;
5139         struct kv_reset_save_regs kv_save = { 0 };
5140         u32 tmp, i;
5141
5142         dev_info(rdev->dev, "GPU pci config reset\n");
5143
5144         /* disable dpm? */
5145
5146         /* disable cg/pg */
5147         cik_fini_pg(rdev);
5148         cik_fini_cg(rdev);
5149
5150         /* Disable GFX parsing/prefetching */
5151         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5152
5153         /* Disable MEC parsing/prefetching */
5154         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5155
5156         /* sdma0 */
5157         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5158         tmp |= SDMA_HALT;
5159         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5160         /* sdma1 */
5161         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5162         tmp |= SDMA_HALT;
5163         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5164         /* XXX other engines? */
5165
5166         /* halt the rlc, disable cp internal ints */
5167         cik_rlc_stop(rdev);
5168
5169         udelay(50);
5170
5171         /* disable mem access */
5172         evergreen_mc_stop(rdev, &save);
5173         if (evergreen_mc_wait_for_idle(rdev)) {
5174                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5175         }
5176
5177         if (rdev->flags & RADEON_IS_IGP)
5178                 kv_save_regs_for_reset(rdev, &kv_save);
5179
5180         /* disable BM */
5181         pci_clear_master(rdev->pdev);
5182         /* reset */
5183         radeon_pci_config_reset(rdev);
5184
5185         udelay(100);
5186
5187         /* wait for asic to come out of reset */
5188         for (i = 0; i < rdev->usec_timeout; i++) {
5189                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5190                         break;
5191                 udelay(1);
5192         }
5193
5194         /* does asic init need to be run first??? */
5195         if (rdev->flags & RADEON_IS_IGP)
5196                 kv_restore_regs_for_reset(rdev, &kv_save);
5197 }
5198
5199 /**
5200  * cik_asic_reset - soft reset GPU
5201  *
5202  * @rdev: radeon_device pointer
5203  * @hard: force hard reset
5204  *
5205  * Look up which blocks are hung and attempt
5206  * to reset them.
5207  * Returns 0 for success.
5208  */
5209 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5210 {
5211         u32 reset_mask;
5212
5213         if (hard) {
5214                 cik_gpu_pci_config_reset(rdev);
5215                 return 0;
5216         }
5217
5218         reset_mask = cik_gpu_check_soft_reset(rdev);
5219
5220         if (reset_mask)
5221                 r600_set_bios_scratch_engine_hung(rdev, true);
5222
5223         /* try soft reset */
5224         cik_gpu_soft_reset(rdev, reset_mask);
5225
5226         reset_mask = cik_gpu_check_soft_reset(rdev);
5227
5228         /* try pci config reset */
5229         if (reset_mask && radeon_hard_reset)
5230                 cik_gpu_pci_config_reset(rdev);
5231
5232         reset_mask = cik_gpu_check_soft_reset(rdev);
5233
5234         if (!reset_mask)
5235                 r600_set_bios_scratch_engine_hung(rdev, false);
5236
5237         return 0;
5238 }
5239
5240 /**
5241  * cik_gfx_is_lockup - check if the 3D engine is locked up
5242  *
5243  * @rdev: radeon_device pointer
5244  * @ring: radeon_ring structure holding ring information
5245  *
5246  * Check if the 3D engine is locked up (CIK).
5247  * Returns true if the engine is locked, false if not.
5248  */
5249 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5250 {
5251         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5252
5253         if (!(reset_mask & (RADEON_RESET_GFX |
5254                             RADEON_RESET_COMPUTE |
5255                             RADEON_RESET_CP))) {
5256                 radeon_ring_lockup_update(rdev, ring);
5257                 return false;
5258         }
5259         return radeon_ring_test_lockup(rdev, ring);
5260 }
5261
5262 /* MC */
5263 /**
5264  * cik_mc_program - program the GPU memory controller
5265  *
5266  * @rdev: radeon_device pointer
5267  *
5268  * Set the location of vram, gart, and AGP in the GPU's
5269  * physical address space (CIK).
5270  */
5271 static void cik_mc_program(struct radeon_device *rdev)
5272 {
5273         struct evergreen_mc_save save;
5274         u32 tmp;
5275         int i, j;
5276
5277         /* Initialize HDP */
5278         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5279                 WREG32((0x2c14 + j), 0x00000000);
5280                 WREG32((0x2c18 + j), 0x00000000);
5281                 WREG32((0x2c1c + j), 0x00000000);
5282                 WREG32((0x2c20 + j), 0x00000000);
5283                 WREG32((0x2c24 + j), 0x00000000);
5284         }
5285         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5286
5287         evergreen_mc_stop(rdev, &save);
5288         if (radeon_mc_wait_for_idle(rdev)) {
5289                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5290         }
5291         /* Lockout access through VGA aperture*/
5292         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5293         /* Update configuration */
5294         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5295                rdev->mc.vram_start >> 12);
5296         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5297                rdev->mc.vram_end >> 12);
5298         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5299                rdev->vram_scratch.gpu_addr >> 12);
5300         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5301         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5302         WREG32(MC_VM_FB_LOCATION, tmp);
5303         /* XXX double check these! */
5304         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5305         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5306         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5307         WREG32(MC_VM_AGP_BASE, 0);
5308         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5309         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5310         if (radeon_mc_wait_for_idle(rdev)) {
5311                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5312         }
5313         evergreen_mc_resume(rdev, &save);
5314         /* we need to own VRAM, so turn off the VGA renderer here
5315          * to stop it overwriting our objects */
5316         rv515_vga_render_disable(rdev);
5317 }
5318
5319 /**
5320  * cik_mc_init - initialize the memory controller driver params
5321  *
5322  * @rdev: radeon_device pointer
5323  *
5324  * Look up the amount of vram, vram width, and decide how to place
5325  * vram and gart within the GPU's physical address space (CIK).
5326  * Returns 0 for success.
5327  */
5328 static int cik_mc_init(struct radeon_device *rdev)
5329 {
5330         u32 tmp;
5331         int chansize, numchan;
5332
5333         /* Get VRAM informations */
5334         rdev->mc.vram_is_ddr = true;
5335         tmp = RREG32(MC_ARB_RAMCFG);
5336         if (tmp & CHANSIZE_MASK) {
5337                 chansize = 64;
5338         } else {
5339                 chansize = 32;
5340         }
5341         tmp = RREG32(MC_SHARED_CHMAP);
5342         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5343         case 0:
5344         default:
5345                 numchan = 1;
5346                 break;
5347         case 1:
5348                 numchan = 2;
5349                 break;
5350         case 2:
5351                 numchan = 4;
5352                 break;
5353         case 3:
5354                 numchan = 8;
5355                 break;
5356         case 4:
5357                 numchan = 3;
5358                 break;
5359         case 5:
5360                 numchan = 6;
5361                 break;
5362         case 6:
5363                 numchan = 10;
5364                 break;
5365         case 7:
5366                 numchan = 12;
5367                 break;
5368         case 8:
5369                 numchan = 16;
5370                 break;
5371         }
5372         rdev->mc.vram_width = numchan * chansize;
5373         /* Could aper size report 0 ? */
5374         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5375         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5376         /* size in MB on si */
5377         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5378         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5379         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5380         si_vram_gtt_location(rdev, &rdev->mc);
5381         radeon_update_bandwidth_info(rdev);
5382
5383         return 0;
5384 }
5385
5386 /*
5387  * GART
5388  * VMID 0 is the physical GPU addresses as used by the kernel.
5389  * VMIDs 1-15 are used for userspace clients and are handled
5390  * by the radeon vm/hsa code.
5391  */
5392 /**
5393  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5394  *
5395  * @rdev: radeon_device pointer
5396  *
5397  * Flush the TLB for the VMID 0 page table (CIK).
5398  */
5399 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5400 {
5401         /* flush hdp cache */
5402         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5403
5404         /* bits 0-15 are the VM contexts0-15 */
5405         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5406 }
5407
5408 /**
5409  * cik_pcie_gart_enable - gart enable
5410  *
5411  * @rdev: radeon_device pointer
5412  *
5413  * This sets up the TLBs, programs the page tables for VMID0,
5414  * sets up the hw for VMIDs 1-15 which are allocated on
5415  * demand, and sets up the global locations for the LDS, GDS,
5416  * and GPUVM for FSA64 clients (CIK).
5417  * Returns 0 for success, errors for failure.
5418  */
5419 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5420 {
5421         int r, i;
5422
5423         if (rdev->gart.robj == NULL) {
5424                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5425                 return -EINVAL;
5426         }
5427         r = radeon_gart_table_vram_pin(rdev);
5428         if (r)
5429                 return r;
5430         /* Setup TLB control */
5431         WREG32(MC_VM_MX_L1_TLB_CNTL,
5432                (0xA << 7) |
5433                ENABLE_L1_TLB |
5434                ENABLE_L1_FRAGMENT_PROCESSING |
5435                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5436                ENABLE_ADVANCED_DRIVER_MODEL |
5437                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5438         /* Setup L2 cache */
5439         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5440                ENABLE_L2_FRAGMENT_PROCESSING |
5441                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5442                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5443                EFFECTIVE_L2_QUEUE_SIZE(7) |
5444                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5445         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5446         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5447                BANK_SELECT(4) |
5448                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5449         /* setup context0 */
5450         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5451         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5452         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5453         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5454                         (u32)(rdev->dummy_page.addr >> 12));
5455         WREG32(VM_CONTEXT0_CNTL2, 0);
5456         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5457                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5458
5459         WREG32(0x15D4, 0);
5460         WREG32(0x15D8, 0);
5461         WREG32(0x15DC, 0);
5462
5463         /* restore context1-15 */
5464         /* set vm size, must be a multiple of 4 */
5465         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5466         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5467         for (i = 1; i < 16; i++) {
5468                 if (i < 8)
5469                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5470                                rdev->vm_manager.saved_table_addr[i]);
5471                 else
5472                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5473                                rdev->vm_manager.saved_table_addr[i]);
5474         }
5475
5476         /* enable context1-15 */
5477         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5478                (u32)(rdev->dummy_page.addr >> 12));
5479         WREG32(VM_CONTEXT1_CNTL2, 4);
5480         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5481                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5482                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5483                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5484                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5485                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5486                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5487                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5488                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5489                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5490                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5491                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5492                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5493                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5494
5495         if (rdev->family == CHIP_KAVERI) {
5496                 u32 tmp = RREG32(CHUB_CONTROL);
5497                 tmp &= ~BYPASS_VM;
5498                 WREG32(CHUB_CONTROL, tmp);
5499         }
5500
5501         /* XXX SH_MEM regs */
5502         /* where to put LDS, scratch, GPUVM in FSA64 space */
5503         mutex_lock(&rdev->srbm_mutex);
5504         for (i = 0; i < 16; i++) {
5505                 cik_srbm_select(rdev, 0, 0, 0, i);
5506                 /* CP and shaders */
5507                 WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5508                 WREG32(SH_MEM_APE1_BASE, 1);
5509                 WREG32(SH_MEM_APE1_LIMIT, 0);
5510                 WREG32(SH_MEM_BASES, 0);
5511                 /* SDMA GFX */
5512                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5513                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5514                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5515                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5516                 /* XXX SDMA RLC - todo */
5517         }
5518         cik_srbm_select(rdev, 0, 0, 0, 0);
5519         mutex_unlock(&rdev->srbm_mutex);
5520
5521         cik_pcie_gart_tlb_flush(rdev);
5522         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5523                  (unsigned)(rdev->mc.gtt_size >> 20),
5524                  (unsigned long long)rdev->gart.table_addr);
5525         rdev->gart.ready = true;
5526         return 0;
5527 }
5528
5529 /**
5530  * cik_pcie_gart_disable - gart disable
5531  *
5532  * @rdev: radeon_device pointer
5533  *
5534  * This disables all VM page table (CIK).
5535  */
5536 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5537 {
5538         unsigned i;
5539
5540         for (i = 1; i < 16; ++i) {
5541                 uint32_t reg;
5542                 if (i < 8)
5543                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5544                 else
5545                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5546                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5547         }
5548
5549         /* Disable all tables */
5550         WREG32(VM_CONTEXT0_CNTL, 0);
5551         WREG32(VM_CONTEXT1_CNTL, 0);
5552         /* Setup TLB control */
5553         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5554                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5555         /* Setup L2 cache */
5556         WREG32(VM_L2_CNTL,
5557                ENABLE_L2_FRAGMENT_PROCESSING |
5558                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5559                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5560                EFFECTIVE_L2_QUEUE_SIZE(7) |
5561                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5562         WREG32(VM_L2_CNTL2, 0);
5563         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5564                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5565         radeon_gart_table_vram_unpin(rdev);
5566 }
5567
5568 /**
5569  * cik_pcie_gart_fini - vm fini callback
5570  *
5571  * @rdev: radeon_device pointer
5572  *
5573  * Tears down the driver GART/VM setup (CIK).
5574  */
5575 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5576 {
5577         cik_pcie_gart_disable(rdev);
5578         radeon_gart_table_vram_free(rdev);
5579         radeon_gart_fini(rdev);
5580 }
5581
5582 /* vm parser */
5583 /**
5584  * cik_ib_parse - vm ib_parse callback
5585  *
5586  * @rdev: radeon_device pointer
5587  * @ib: indirect buffer pointer
5588  *
5589  * CIK uses hw IB checking so this is a nop (CIK).
5590  */
5591 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5592 {
5593         return 0;
5594 }
5595
5596 /*
5597  * vm
5598  * VMID 0 is the physical GPU addresses as used by the kernel.
5599  * VMIDs 1-15 are used for userspace clients and are handled
5600  * by the radeon vm/hsa code.
5601  */
5602 /**
5603  * cik_vm_init - cik vm init callback
5604  *
5605  * @rdev: radeon_device pointer
5606  *
5607  * Inits cik specific vm parameters (number of VMs, base of vram for
5608  * VMIDs 1-15) (CIK).
5609  * Returns 0 for success.
5610  */
5611 int cik_vm_init(struct radeon_device *rdev)
5612 {
5613         /*
5614          * number of VMs
5615          * VMID 0 is reserved for System
5616          * radeon graphics/compute will use VMIDs 1-15
5617          */
5618         rdev->vm_manager.nvm = 16;
5619         /* base offset of vram pages */
5620         if (rdev->flags & RADEON_IS_IGP) {
5621                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5622                 tmp <<= 22;
5623                 rdev->vm_manager.vram_base_offset = tmp;
5624         } else
5625                 rdev->vm_manager.vram_base_offset = 0;
5626
5627         return 0;
5628 }
5629
5630 /**
5631  * cik_vm_fini - cik vm fini callback
5632  *
5633  * @rdev: radeon_device pointer
5634  *
5635  * Tear down any asic specific VM setup (CIK).
5636  */
5637 void cik_vm_fini(struct radeon_device *rdev)
5638 {
5639 }
5640
5641 /**
5642  * cik_vm_decode_fault - print human readable fault info
5643  *
5644  * @rdev: radeon_device pointer
5645  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5646  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5647  * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value
5648  *
5649  * Print human readable fault information (CIK).
5650  */
5651 static void cik_vm_decode_fault(struct radeon_device *rdev,
5652                                 u32 status, u32 addr, u32 mc_client)
5653 {
5654         u32 mc_id;
5655         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5656         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5657         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5658                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5659
5660         if (rdev->family == CHIP_HAWAII)
5661                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5662         else
5663                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5664
5665         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5666                protections, vmid, addr,
5667                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5668                block, mc_client, mc_id);
5669 }
5670
5671 /*
5672  * cik_vm_flush - cik vm flush using the CP
5673  *
5674  * Update the page table base and flush the VM TLB
5675  * using the CP (CIK).
5676  */
5677 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5678                   unsigned vm_id, uint64_t pd_addr)
5679 {
5680         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5681
5682         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5683         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5684                                  WRITE_DATA_DST_SEL(0)));
5685         if (vm_id < 8) {
5686                 radeon_ring_write(ring,
5687                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5688         } else {
5689                 radeon_ring_write(ring,
5690                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5691         }
5692         radeon_ring_write(ring, 0);
5693         radeon_ring_write(ring, pd_addr >> 12);
5694
5695         /* update SH_MEM_* regs */
5696         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5697         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5698                                  WRITE_DATA_DST_SEL(0)));
5699         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5700         radeon_ring_write(ring, 0);
5701         radeon_ring_write(ring, VMID(vm_id));
5702
5703         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5704         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5705                                  WRITE_DATA_DST_SEL(0)));
5706         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5707         radeon_ring_write(ring, 0);
5708
5709         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5710         radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5711         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5712         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5713
5714         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5715         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5716                                  WRITE_DATA_DST_SEL(0)));
5717         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5718         radeon_ring_write(ring, 0);
5719         radeon_ring_write(ring, VMID(0));
5720
5721         /* HDP flush */
5722         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5723
5724         /* bits 0-15 are the VM contexts0-15 */
5725         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5726         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5727                                  WRITE_DATA_DST_SEL(0)));
5728         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5729         radeon_ring_write(ring, 0);
5730         radeon_ring_write(ring, 1 << vm_id);
5731
5732         /* wait for the invalidate to complete */
5733         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5734         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5735                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5736                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5737         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5738         radeon_ring_write(ring, 0);
5739         radeon_ring_write(ring, 0); /* ref */
5740         radeon_ring_write(ring, 0); /* mask */
5741         radeon_ring_write(ring, 0x20); /* poll interval */
5742
5743         /* compute doesn't have PFP */
5744         if (usepfp) {
5745                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5746                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5747                 radeon_ring_write(ring, 0x0);
5748         }
5749 }
5750
5751 /*
5752  * RLC
5753  * The RLC is a multi-purpose microengine that handles a
5754  * variety of functions, the most important of which is
5755  * the interrupt controller.
5756  */
5757 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5758                                           bool enable)
5759 {
5760         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5761
5762         if (enable)
5763                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5764         else
5765                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5766         WREG32(CP_INT_CNTL_RING0, tmp);
5767 }
5768
5769 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5770 {
5771         u32 tmp;
5772
5773         tmp = RREG32(RLC_LB_CNTL);
5774         if (enable)
5775                 tmp |= LOAD_BALANCE_ENABLE;
5776         else
5777                 tmp &= ~LOAD_BALANCE_ENABLE;
5778         WREG32(RLC_LB_CNTL, tmp);
5779 }
5780
5781 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5782 {
5783         u32 i, j, k;
5784         u32 mask;
5785
5786         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5787                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5788                         cik_select_se_sh(rdev, i, j);
5789                         for (k = 0; k < rdev->usec_timeout; k++) {
5790                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5791                                         break;
5792                                 udelay(1);
5793                         }
5794                 }
5795         }
5796         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5797
5798         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5799         for (k = 0; k < rdev->usec_timeout; k++) {
5800                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5801                         break;
5802                 udelay(1);
5803         }
5804 }
5805
5806 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5807 {
5808         u32 tmp;
5809
5810         tmp = RREG32(RLC_CNTL);
5811         if (tmp != rlc)
5812                 WREG32(RLC_CNTL, rlc);
5813 }
5814
5815 static u32 cik_halt_rlc(struct radeon_device *rdev)
5816 {
5817         u32 data, orig;
5818
5819         orig = data = RREG32(RLC_CNTL);
5820
5821         if (data & RLC_ENABLE) {
5822                 u32 i;
5823
5824                 data &= ~RLC_ENABLE;
5825                 WREG32(RLC_CNTL, data);
5826
5827                 for (i = 0; i < rdev->usec_timeout; i++) {
5828                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5829                                 break;
5830                         udelay(1);
5831                 }
5832
5833                 cik_wait_for_rlc_serdes(rdev);
5834         }
5835
5836         return orig;
5837 }
5838
5839 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5840 {
5841         u32 tmp, i, mask;
5842
5843         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5844         WREG32(RLC_GPR_REG2, tmp);
5845
5846         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5847         for (i = 0; i < rdev->usec_timeout; i++) {
5848                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5849                         break;
5850                 udelay(1);
5851         }
5852
5853         for (i = 0; i < rdev->usec_timeout; i++) {
5854                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5855                         break;
5856                 udelay(1);
5857         }
5858 }
5859
5860 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5861 {
5862         u32 tmp;
5863
5864         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5865         WREG32(RLC_GPR_REG2, tmp);
5866 }
5867
5868 /**
5869  * cik_rlc_stop - stop the RLC ME
5870  *
5871  * @rdev: radeon_device pointer
5872  *
5873  * Halt the RLC ME (MicroEngine) (CIK).
5874  */
5875 static void cik_rlc_stop(struct radeon_device *rdev)
5876 {
5877         WREG32(RLC_CNTL, 0);
5878
5879         cik_enable_gui_idle_interrupt(rdev, false);
5880
5881         cik_wait_for_rlc_serdes(rdev);
5882 }
5883
5884 /**
5885  * cik_rlc_start - start the RLC ME
5886  *
5887  * @rdev: radeon_device pointer
5888  *
5889  * Unhalt the RLC ME (MicroEngine) (CIK).
5890  */
5891 static void cik_rlc_start(struct radeon_device *rdev)
5892 {
5893         WREG32(RLC_CNTL, RLC_ENABLE);
5894
5895         cik_enable_gui_idle_interrupt(rdev, true);
5896
5897         udelay(50);
5898 }
5899
5900 /**
5901  * cik_rlc_resume - setup the RLC hw
5902  *
5903  * @rdev: radeon_device pointer
5904  *
5905  * Initialize the RLC registers, load the ucode,
5906  * and start the RLC (CIK).
5907  * Returns 0 for success, -EINVAL if the ucode is not available.
5908  */
5909 static int cik_rlc_resume(struct radeon_device *rdev)
5910 {
5911         u32 i, size, tmp;
5912
5913         if (!rdev->rlc_fw)
5914                 return -EINVAL;
5915
5916         cik_rlc_stop(rdev);
5917
5918         /* disable CG */
5919         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5920         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5921
5922         si_rlc_reset(rdev);
5923
5924         cik_init_pg(rdev);
5925
5926         cik_init_cg(rdev);
5927
5928         WREG32(RLC_LB_CNTR_INIT, 0);
5929         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5930
5931         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5932         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5933         WREG32(RLC_LB_PARAMS, 0x00600408);
5934         WREG32(RLC_LB_CNTL, 0x80000004);
5935
5936         WREG32(RLC_MC_CNTL, 0);
5937         WREG32(RLC_UCODE_CNTL, 0);
5938
5939         if (rdev->new_fw) {
5940                 const struct rlc_firmware_header_v1_0 *hdr =
5941                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5942                 const __le32 *fw_data = (const __le32 *)
5943                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5944
5945                 radeon_ucode_print_rlc_hdr(&hdr->header);
5946
5947                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5948                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5949                 for (i = 0; i < size; i++)
5950                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5951                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5952         } else {
5953                 const __be32 *fw_data;
5954
5955                 switch (rdev->family) {
5956                 case CHIP_BONAIRE:
5957                 case CHIP_HAWAII:
5958                 default:
5959                         size = BONAIRE_RLC_UCODE_SIZE;
5960                         break;
5961                 case CHIP_KAVERI:
5962                         size = KV_RLC_UCODE_SIZE;
5963                         break;
5964                 case CHIP_KABINI:
5965                         size = KB_RLC_UCODE_SIZE;
5966                         break;
5967                 case CHIP_MULLINS:
5968                         size = ML_RLC_UCODE_SIZE;
5969                         break;
5970                 }
5971
5972                 fw_data = (const __be32 *)rdev->rlc_fw->data;
5973                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5974                 for (i = 0; i < size; i++)
5975                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5976                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5977         }
5978
5979         /* XXX - find out what chips support lbpw */
5980         cik_enable_lbpw(rdev, false);
5981
5982         if (rdev->family == CHIP_BONAIRE)
5983                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5984
5985         cik_rlc_start(rdev);
5986
5987         return 0;
5988 }
5989
5990 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5991 {
5992         u32 data, orig, tmp, tmp2;
5993
5994         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5995
5996         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5997                 cik_enable_gui_idle_interrupt(rdev, true);
5998
5999                 tmp = cik_halt_rlc(rdev);
6000
6001                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6002                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6003                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6004                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6005                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6006
6007                 cik_update_rlc(rdev, tmp);
6008
6009                 data |= CGCG_EN | CGLS_EN;
6010         } else {
6011                 cik_enable_gui_idle_interrupt(rdev, false);
6012
6013                 RREG32(CB_CGTT_SCLK_CTRL);
6014                 RREG32(CB_CGTT_SCLK_CTRL);
6015                 RREG32(CB_CGTT_SCLK_CTRL);
6016                 RREG32(CB_CGTT_SCLK_CTRL);
6017
6018                 data &= ~(CGCG_EN | CGLS_EN);
6019         }
6020
6021         if (orig != data)
6022                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6023
6024 }
6025
6026 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6027 {
6028         u32 data, orig, tmp = 0;
6029
6030         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6031                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6032                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6033                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6034                                 data |= CP_MEM_LS_EN;
6035                                 if (orig != data)
6036                                         WREG32(CP_MEM_SLP_CNTL, data);
6037                         }
6038                 }
6039
6040                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6041                 data |= 0x00000001;
6042                 data &= 0xfffffffd;
6043                 if (orig != data)
6044                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6045
6046                 tmp = cik_halt_rlc(rdev);
6047
6048                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6049                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6050                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6051                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6052                 WREG32(RLC_SERDES_WR_CTRL, data);
6053
6054                 cik_update_rlc(rdev, tmp);
6055
6056                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6057                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6058                         data &= ~SM_MODE_MASK;
6059                         data |= SM_MODE(0x2);
6060                         data |= SM_MODE_ENABLE;
6061                         data &= ~CGTS_OVERRIDE;
6062                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6063                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6064                                 data &= ~CGTS_LS_OVERRIDE;
6065                         data &= ~ON_MONITOR_ADD_MASK;
6066                         data |= ON_MONITOR_ADD_EN;
6067                         data |= ON_MONITOR_ADD(0x96);
6068                         if (orig != data)
6069                                 WREG32(CGTS_SM_CTRL_REG, data);
6070                 }
6071         } else {
6072                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6073                 data |= 0x00000003;
6074                 if (orig != data)
6075                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6076
6077                 data = RREG32(RLC_MEM_SLP_CNTL);
6078                 if (data & RLC_MEM_LS_EN) {
6079                         data &= ~RLC_MEM_LS_EN;
6080                         WREG32(RLC_MEM_SLP_CNTL, data);
6081                 }
6082
6083                 data = RREG32(CP_MEM_SLP_CNTL);
6084                 if (data & CP_MEM_LS_EN) {
6085                         data &= ~CP_MEM_LS_EN;
6086                         WREG32(CP_MEM_SLP_CNTL, data);
6087                 }
6088
6089                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6090                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6091                 if (orig != data)
6092                         WREG32(CGTS_SM_CTRL_REG, data);
6093
6094                 tmp = cik_halt_rlc(rdev);
6095
6096                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6097                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6098                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6099                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6100                 WREG32(RLC_SERDES_WR_CTRL, data);
6101
6102                 cik_update_rlc(rdev, tmp);
6103         }
6104 }
6105
6106 static const u32 mc_cg_registers[] =
6107 {
6108         MC_HUB_MISC_HUB_CG,
6109         MC_HUB_MISC_SIP_CG,
6110         MC_HUB_MISC_VM_CG,
6111         MC_XPB_CLK_GAT,
6112         ATC_MISC_CG,
6113         MC_CITF_MISC_WR_CG,
6114         MC_CITF_MISC_RD_CG,
6115         MC_CITF_MISC_VM_CG,
6116         VM_L2_CG,
6117 };
6118
6119 static void cik_enable_mc_ls(struct radeon_device *rdev,
6120                              bool enable)
6121 {
6122         int i;
6123         u32 orig, data;
6124
6125         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6126                 orig = data = RREG32(mc_cg_registers[i]);
6127                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6128                         data |= MC_LS_ENABLE;
6129                 else
6130                         data &= ~MC_LS_ENABLE;
6131                 if (data != orig)
6132                         WREG32(mc_cg_registers[i], data);
6133         }
6134 }
6135
6136 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6137                                bool enable)
6138 {
6139         int i;
6140         u32 orig, data;
6141
6142         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6143                 orig = data = RREG32(mc_cg_registers[i]);
6144                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6145                         data |= MC_CG_ENABLE;
6146                 else
6147                         data &= ~MC_CG_ENABLE;
6148                 if (data != orig)
6149                         WREG32(mc_cg_registers[i], data);
6150         }
6151 }
6152
6153 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6154                                  bool enable)
6155 {
6156         u32 orig, data;
6157
6158         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6159                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6160                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6161         } else {
6162                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6163                 data |= 0xff000000;
6164                 if (data != orig)
6165                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6166
6167                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6168                 data |= 0xff000000;
6169                 if (data != orig)
6170                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6171         }
6172 }
6173
6174 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6175                                  bool enable)
6176 {
6177         u32 orig, data;
6178
6179         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6180                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6181                 data |= 0x100;
6182                 if (orig != data)
6183                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6184
6185                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6186                 data |= 0x100;
6187                 if (orig != data)
6188                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6189         } else {
6190                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6191                 data &= ~0x100;
6192                 if (orig != data)
6193                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6194
6195                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6196                 data &= ~0x100;
6197                 if (orig != data)
6198                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6199         }
6200 }
6201
6202 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6203                                 bool enable)
6204 {
6205         u32 orig, data;
6206
6207         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6208                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6209                 data = 0xfff;
6210                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6211
6212                 orig = data = RREG32(UVD_CGC_CTRL);
6213                 data |= DCM;
6214                 if (orig != data)
6215                         WREG32(UVD_CGC_CTRL, data);
6216         } else {
6217                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6218                 data &= ~0xfff;
6219                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6220
6221                 orig = data = RREG32(UVD_CGC_CTRL);
6222                 data &= ~DCM;
6223                 if (orig != data)
6224                         WREG32(UVD_CGC_CTRL, data);
6225         }
6226 }
6227
6228 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6229                                bool enable)
6230 {
6231         u32 orig, data;
6232
6233         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6234
6235         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6236                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6237                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6238         else
6239                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6240                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6241
6242         if (orig != data)
6243                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6244 }
6245
6246 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6247                                 bool enable)
6248 {
6249         u32 orig, data;
6250
6251         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6252
6253         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6254                 data &= ~CLOCK_GATING_DIS;
6255         else
6256                 data |= CLOCK_GATING_DIS;
6257
6258         if (orig != data)
6259                 WREG32(HDP_HOST_PATH_CNTL, data);
6260 }
6261
6262 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6263                               bool enable)
6264 {
6265         u32 orig, data;
6266
6267         orig = data = RREG32(HDP_MEM_POWER_LS);
6268
6269         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6270                 data |= HDP_LS_ENABLE;
6271         else
6272                 data &= ~HDP_LS_ENABLE;
6273
6274         if (orig != data)
6275                 WREG32(HDP_MEM_POWER_LS, data);
6276 }
6277
6278 void cik_update_cg(struct radeon_device *rdev,
6279                    u32 block, bool enable)
6280 {
6281
6282         if (block & RADEON_CG_BLOCK_GFX) {
6283                 cik_enable_gui_idle_interrupt(rdev, false);
6284                 /* order matters! */
6285                 if (enable) {
6286                         cik_enable_mgcg(rdev, true);
6287                         cik_enable_cgcg(rdev, true);
6288                 } else {
6289                         cik_enable_cgcg(rdev, false);
6290                         cik_enable_mgcg(rdev, false);
6291                 }
6292                 cik_enable_gui_idle_interrupt(rdev, true);
6293         }
6294
6295         if (block & RADEON_CG_BLOCK_MC) {
6296                 if (!(rdev->flags & RADEON_IS_IGP)) {
6297                         cik_enable_mc_mgcg(rdev, enable);
6298                         cik_enable_mc_ls(rdev, enable);
6299                 }
6300         }
6301
6302         if (block & RADEON_CG_BLOCK_SDMA) {
6303                 cik_enable_sdma_mgcg(rdev, enable);
6304                 cik_enable_sdma_mgls(rdev, enable);
6305         }
6306
6307         if (block & RADEON_CG_BLOCK_BIF) {
6308                 cik_enable_bif_mgls(rdev, enable);
6309         }
6310
6311         if (block & RADEON_CG_BLOCK_UVD) {
6312                 if (rdev->has_uvd)
6313                         cik_enable_uvd_mgcg(rdev, enable);
6314         }
6315
6316         if (block & RADEON_CG_BLOCK_HDP) {
6317                 cik_enable_hdp_mgcg(rdev, enable);
6318                 cik_enable_hdp_ls(rdev, enable);
6319         }
6320
6321         if (block & RADEON_CG_BLOCK_VCE) {
6322                 vce_v2_0_enable_mgcg(rdev, enable);
6323         }
6324 }
6325
6326 static void cik_init_cg(struct radeon_device *rdev)
6327 {
6328
6329         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6330
6331         if (rdev->has_uvd)
6332                 si_init_uvd_internal_cg(rdev);
6333
6334         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6335                              RADEON_CG_BLOCK_SDMA |
6336                              RADEON_CG_BLOCK_BIF |
6337                              RADEON_CG_BLOCK_UVD |
6338                              RADEON_CG_BLOCK_HDP), true);
6339 }
6340
6341 static void cik_fini_cg(struct radeon_device *rdev)
6342 {
6343         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6344                              RADEON_CG_BLOCK_SDMA |
6345                              RADEON_CG_BLOCK_BIF |
6346                              RADEON_CG_BLOCK_UVD |
6347                              RADEON_CG_BLOCK_HDP), false);
6348
6349         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6350 }
6351
6352 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6353                                           bool enable)
6354 {
6355         u32 data, orig;
6356
6357         orig = data = RREG32(RLC_PG_CNTL);
6358         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6359                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6360         else
6361                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6362         if (orig != data)
6363                 WREG32(RLC_PG_CNTL, data);
6364 }
6365
6366 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6367                                           bool enable)
6368 {
6369         u32 data, orig;
6370
6371         orig = data = RREG32(RLC_PG_CNTL);
6372         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6373                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6374         else
6375                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6376         if (orig != data)
6377                 WREG32(RLC_PG_CNTL, data);
6378 }
6379
6380 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6381 {
6382         u32 data, orig;
6383
6384         orig = data = RREG32(RLC_PG_CNTL);
6385         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6386                 data &= ~DISABLE_CP_PG;
6387         else
6388                 data |= DISABLE_CP_PG;
6389         if (orig != data)
6390                 WREG32(RLC_PG_CNTL, data);
6391 }
6392
6393 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6394 {
6395         u32 data, orig;
6396
6397         orig = data = RREG32(RLC_PG_CNTL);
6398         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6399                 data &= ~DISABLE_GDS_PG;
6400         else
6401                 data |= DISABLE_GDS_PG;
6402         if (orig != data)
6403                 WREG32(RLC_PG_CNTL, data);
6404 }
6405
6406 #define CP_ME_TABLE_SIZE    96
6407 #define CP_ME_TABLE_OFFSET  2048
6408 #define CP_MEC_TABLE_OFFSET 4096
6409
6410 void cik_init_cp_pg_table(struct radeon_device *rdev)
6411 {
6412         volatile u32 *dst_ptr;
6413         int me, i, max_me = 4;
6414         u32 bo_offset = 0;
6415         u32 table_offset, table_size;
6416
6417         if (rdev->family == CHIP_KAVERI)
6418                 max_me = 5;
6419
6420         if (rdev->rlc.cp_table_ptr == NULL)
6421                 return;
6422
6423         /* write the cp table buffer */
6424         dst_ptr = rdev->rlc.cp_table_ptr;
6425         for (me = 0; me < max_me; me++) {
6426                 if (rdev->new_fw) {
6427                         const __le32 *fw_data;
6428                         const struct gfx_firmware_header_v1_0 *hdr;
6429
6430                         if (me == 0) {
6431                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6432                                 fw_data = (const __le32 *)
6433                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6434                                 table_offset = le32_to_cpu(hdr->jt_offset);
6435                                 table_size = le32_to_cpu(hdr->jt_size);
6436                         } else if (me == 1) {
6437                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6438                                 fw_data = (const __le32 *)
6439                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6440                                 table_offset = le32_to_cpu(hdr->jt_offset);
6441                                 table_size = le32_to_cpu(hdr->jt_size);
6442                         } else if (me == 2) {
6443                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6444                                 fw_data = (const __le32 *)
6445                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6446                                 table_offset = le32_to_cpu(hdr->jt_offset);
6447                                 table_size = le32_to_cpu(hdr->jt_size);
6448                         } else if (me == 3) {
6449                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6450                                 fw_data = (const __le32 *)
6451                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6452                                 table_offset = le32_to_cpu(hdr->jt_offset);
6453                                 table_size = le32_to_cpu(hdr->jt_size);
6454                         } else {
6455                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6456                                 fw_data = (const __le32 *)
6457                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6458                                 table_offset = le32_to_cpu(hdr->jt_offset);
6459                                 table_size = le32_to_cpu(hdr->jt_size);
6460                         }
6461
6462                         for (i = 0; i < table_size; i ++) {
6463                                 dst_ptr[bo_offset + i] =
6464                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6465                         }
6466                         bo_offset += table_size;
6467                 } else {
6468                         const __be32 *fw_data;
6469                         table_size = CP_ME_TABLE_SIZE;
6470
6471                         if (me == 0) {
6472                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6473                                 table_offset = CP_ME_TABLE_OFFSET;
6474                         } else if (me == 1) {
6475                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6476                                 table_offset = CP_ME_TABLE_OFFSET;
6477                         } else if (me == 2) {
6478                                 fw_data = (const __be32 *)rdev->me_fw->data;
6479                                 table_offset = CP_ME_TABLE_OFFSET;
6480                         } else {
6481                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6482                                 table_offset = CP_MEC_TABLE_OFFSET;
6483                         }
6484
6485                         for (i = 0; i < table_size; i ++) {
6486                                 dst_ptr[bo_offset + i] =
6487                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6488                         }
6489                         bo_offset += table_size;
6490                 }
6491         }
6492 }
6493
6494 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6495                                 bool enable)
6496 {
6497         u32 data, orig;
6498
6499         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6500                 orig = data = RREG32(RLC_PG_CNTL);
6501                 data |= GFX_PG_ENABLE;
6502                 if (orig != data)
6503                         WREG32(RLC_PG_CNTL, data);
6504
6505                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6506                 data |= AUTO_PG_EN;
6507                 if (orig != data)
6508                         WREG32(RLC_AUTO_PG_CTRL, data);
6509         } else {
6510                 orig = data = RREG32(RLC_PG_CNTL);
6511                 data &= ~GFX_PG_ENABLE;
6512                 if (orig != data)
6513                         WREG32(RLC_PG_CNTL, data);
6514
6515                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6516                 data &= ~AUTO_PG_EN;
6517                 if (orig != data)
6518                         WREG32(RLC_AUTO_PG_CTRL, data);
6519
6520                 data = RREG32(DB_RENDER_CONTROL);
6521         }
6522 }
6523
6524 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6525 {
6526         u32 mask = 0, tmp, tmp1;
6527         int i;
6528
6529         cik_select_se_sh(rdev, se, sh);
6530         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6531         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6532         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6533
6534         tmp &= 0xffff0000;
6535
6536         tmp |= tmp1;
6537         tmp >>= 16;
6538
6539         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6540                 mask <<= 1;
6541                 mask |= 1;
6542         }
6543
6544         return (~tmp) & mask;
6545 }
6546
6547 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6548 {
6549         u32 i, j, k, active_cu_number = 0;
6550         u32 mask, counter, cu_bitmap;
6551         u32 tmp = 0;
6552
6553         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6554                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6555                         mask = 1;
6556                         cu_bitmap = 0;
6557                         counter = 0;
6558                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6559                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6560                                         if (counter < 2)
6561                                                 cu_bitmap |= mask;
6562                                         counter ++;
6563                                 }
6564                                 mask <<= 1;
6565                         }
6566
6567                         active_cu_number += counter;
6568                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6569                 }
6570         }
6571
6572         WREG32(RLC_PG_AO_CU_MASK, tmp);
6573
6574         tmp = RREG32(RLC_MAX_PG_CU);
6575         tmp &= ~MAX_PU_CU_MASK;
6576         tmp |= MAX_PU_CU(active_cu_number);
6577         WREG32(RLC_MAX_PG_CU, tmp);
6578 }
6579
6580 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6581                                        bool enable)
6582 {
6583         u32 data, orig;
6584
6585         orig = data = RREG32(RLC_PG_CNTL);
6586         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6587                 data |= STATIC_PER_CU_PG_ENABLE;
6588         else
6589                 data &= ~STATIC_PER_CU_PG_ENABLE;
6590         if (orig != data)
6591                 WREG32(RLC_PG_CNTL, data);
6592 }
6593
6594 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6595                                         bool enable)
6596 {
6597         u32 data, orig;
6598
6599         orig = data = RREG32(RLC_PG_CNTL);
6600         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6601                 data |= DYN_PER_CU_PG_ENABLE;
6602         else
6603                 data &= ~DYN_PER_CU_PG_ENABLE;
6604         if (orig != data)
6605                 WREG32(RLC_PG_CNTL, data);
6606 }
6607
6608 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6609 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6610
6611 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6612 {
6613         u32 data, orig;
6614         u32 i;
6615
6616         if (rdev->rlc.cs_data) {
6617                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6618                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6619                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6620                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6621         } else {
6622                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6623                 for (i = 0; i < 3; i++)
6624                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6625         }
6626         if (rdev->rlc.reg_list) {
6627                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6628                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6629                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6630         }
6631
6632         orig = data = RREG32(RLC_PG_CNTL);
6633         data |= GFX_PG_SRC;
6634         if (orig != data)
6635                 WREG32(RLC_PG_CNTL, data);
6636
6637         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6638         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6639
6640         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6641         data &= ~IDLE_POLL_COUNT_MASK;
6642         data |= IDLE_POLL_COUNT(0x60);
6643         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6644
6645         data = 0x10101010;
6646         WREG32(RLC_PG_DELAY, data);
6647
6648         data = RREG32(RLC_PG_DELAY_2);
6649         data &= ~0xff;
6650         data |= 0x3;
6651         WREG32(RLC_PG_DELAY_2, data);
6652
6653         data = RREG32(RLC_AUTO_PG_CTRL);
6654         data &= ~GRBM_REG_SGIT_MASK;
6655         data |= GRBM_REG_SGIT(0x700);
6656         WREG32(RLC_AUTO_PG_CTRL, data);
6657
6658 }
6659
6660 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6661 {
6662         cik_enable_gfx_cgpg(rdev, enable);
6663         cik_enable_gfx_static_mgpg(rdev, enable);
6664         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6665 }
6666
6667 u32 cik_get_csb_size(struct radeon_device *rdev)
6668 {
6669         u32 count = 0;
6670         const struct cs_section_def *sect = NULL;
6671         const struct cs_extent_def *ext = NULL;
6672
6673         if (rdev->rlc.cs_data == NULL)
6674                 return 0;
6675
6676         /* begin clear state */
6677         count += 2;
6678         /* context control state */
6679         count += 3;
6680
6681         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6682                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6683                         if (sect->id == SECT_CONTEXT)
6684                                 count += 2 + ext->reg_count;
6685                         else
6686                                 return 0;
6687                 }
6688         }
6689         /* pa_sc_raster_config/pa_sc_raster_config1 */
6690         count += 4;
6691         /* end clear state */
6692         count += 2;
6693         /* clear state */
6694         count += 2;
6695
6696         return count;
6697 }
6698
6699 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6700 {
6701         u32 count = 0, i;
6702         const struct cs_section_def *sect = NULL;
6703         const struct cs_extent_def *ext = NULL;
6704
6705         if (rdev->rlc.cs_data == NULL)
6706                 return;
6707         if (buffer == NULL)
6708                 return;
6709
6710         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6711         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6712
6713         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6714         buffer[count++] = cpu_to_le32(0x80000000);
6715         buffer[count++] = cpu_to_le32(0x80000000);
6716
6717         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6718                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6719                         if (sect->id == SECT_CONTEXT) {
6720                                 buffer[count++] =
6721                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6722                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6723                                 for (i = 0; i < ext->reg_count; i++)
6724                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6725                         } else {
6726                                 return;
6727                         }
6728                 }
6729         }
6730
6731         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6732         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6733         switch (rdev->family) {
6734         case CHIP_BONAIRE:
6735                 buffer[count++] = cpu_to_le32(0x16000012);
6736                 buffer[count++] = cpu_to_le32(0x00000000);
6737                 break;
6738         case CHIP_KAVERI:
6739                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6740                 buffer[count++] = cpu_to_le32(0x00000000);
6741                 break;
6742         case CHIP_KABINI:
6743         case CHIP_MULLINS:
6744                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6745                 buffer[count++] = cpu_to_le32(0x00000000);
6746                 break;
6747         case CHIP_HAWAII:
6748                 buffer[count++] = cpu_to_le32(0x3a00161a);
6749                 buffer[count++] = cpu_to_le32(0x0000002e);
6750                 break;
6751         default:
6752                 buffer[count++] = cpu_to_le32(0x00000000);
6753                 buffer[count++] = cpu_to_le32(0x00000000);
6754                 break;
6755         }
6756
6757         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6758         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6759
6760         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6761         buffer[count++] = cpu_to_le32(0);
6762 }
6763
6764 static void cik_init_pg(struct radeon_device *rdev)
6765 {
6766         if (rdev->pg_flags) {
6767                 cik_enable_sck_slowdown_on_pu(rdev, true);
6768                 cik_enable_sck_slowdown_on_pd(rdev, true);
6769                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6770                         cik_init_gfx_cgpg(rdev);
6771                         cik_enable_cp_pg(rdev, true);
6772                         cik_enable_gds_pg(rdev, true);
6773                 }
6774                 cik_init_ao_cu_mask(rdev);
6775                 cik_update_gfx_pg(rdev, true);
6776         }
6777 }
6778
6779 static void cik_fini_pg(struct radeon_device *rdev)
6780 {
6781         if (rdev->pg_flags) {
6782                 cik_update_gfx_pg(rdev, false);
6783                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6784                         cik_enable_cp_pg(rdev, false);
6785                         cik_enable_gds_pg(rdev, false);
6786                 }
6787         }
6788 }
6789
6790 /*
6791  * Interrupts
6792  * Starting with r6xx, interrupts are handled via a ring buffer.
6793  * Ring buffers are areas of GPU accessible memory that the GPU
6794  * writes interrupt vectors into and the host reads vectors out of.
6795  * There is a rptr (read pointer) that determines where the
6796  * host is currently reading, and a wptr (write pointer)
6797  * which determines where the GPU has written.  When the
6798  * pointers are equal, the ring is idle.  When the GPU
6799  * writes vectors to the ring buffer, it increments the
6800  * wptr.  When there is an interrupt, the host then starts
6801  * fetching commands and processing them until the pointers are
6802  * equal again at which point it updates the rptr.
6803  */
6804
6805 /**
6806  * cik_enable_interrupts - Enable the interrupt ring buffer
6807  *
6808  * @rdev: radeon_device pointer
6809  *
6810  * Enable the interrupt ring buffer (CIK).
6811  */
6812 static void cik_enable_interrupts(struct radeon_device *rdev)
6813 {
6814         u32 ih_cntl = RREG32(IH_CNTL);
6815         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6816
6817         ih_cntl |= ENABLE_INTR;
6818         ih_rb_cntl |= IH_RB_ENABLE;
6819         WREG32(IH_CNTL, ih_cntl);
6820         WREG32(IH_RB_CNTL, ih_rb_cntl);
6821         rdev->ih.enabled = true;
6822 }
6823
6824 /**
6825  * cik_disable_interrupts - Disable the interrupt ring buffer
6826  *
6827  * @rdev: radeon_device pointer
6828  *
6829  * Disable the interrupt ring buffer (CIK).
6830  */
6831 static void cik_disable_interrupts(struct radeon_device *rdev)
6832 {
6833         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6834         u32 ih_cntl = RREG32(IH_CNTL);
6835
6836         ih_rb_cntl &= ~IH_RB_ENABLE;
6837         ih_cntl &= ~ENABLE_INTR;
6838         WREG32(IH_RB_CNTL, ih_rb_cntl);
6839         WREG32(IH_CNTL, ih_cntl);
6840         /* set rptr, wptr to 0 */
6841         WREG32(IH_RB_RPTR, 0);
6842         WREG32(IH_RB_WPTR, 0);
6843         rdev->ih.enabled = false;
6844         rdev->ih.rptr = 0;
6845 }
6846
6847 /**
6848  * cik_disable_interrupt_state - Disable all interrupt sources
6849  *
6850  * @rdev: radeon_device pointer
6851  *
6852  * Clear all interrupt enable bits used by the driver (CIK).
6853  */
6854 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6855 {
6856         u32 tmp;
6857
6858         /* gfx ring */
6859         tmp = RREG32(CP_INT_CNTL_RING0) &
6860                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6861         WREG32(CP_INT_CNTL_RING0, tmp);
6862         /* sdma */
6863         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6864         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6865         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6866         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6867         /* compute queues */
6868         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6869         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6870         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6871         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6872         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6873         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6874         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6875         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6876         /* grbm */
6877         WREG32(GRBM_INT_CNTL, 0);
6878         /* SRBM */
6879         WREG32(SRBM_INT_CNTL, 0);
6880         /* vline/vblank, etc. */
6881         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6882         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6883         if (rdev->num_crtc >= 4) {
6884                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6885                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6886         }
6887         if (rdev->num_crtc >= 6) {
6888                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6889                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6890         }
6891         /* pflip */
6892         if (rdev->num_crtc >= 2) {
6893                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6894                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6895         }
6896         if (rdev->num_crtc >= 4) {
6897                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6898                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6899         }
6900         if (rdev->num_crtc >= 6) {
6901                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6902                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6903         }
6904
6905         /* dac hotplug */
6906         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6907
6908         /* digital hotplug */
6909         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6910         WREG32(DC_HPD1_INT_CONTROL, tmp);
6911         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6912         WREG32(DC_HPD2_INT_CONTROL, tmp);
6913         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6914         WREG32(DC_HPD3_INT_CONTROL, tmp);
6915         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6916         WREG32(DC_HPD4_INT_CONTROL, tmp);
6917         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6918         WREG32(DC_HPD5_INT_CONTROL, tmp);
6919         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6920         WREG32(DC_HPD6_INT_CONTROL, tmp);
6921
6922 }
6923
6924 /**
6925  * cik_irq_init - init and enable the interrupt ring
6926  *
6927  * @rdev: radeon_device pointer
6928  *
6929  * Allocate a ring buffer for the interrupt controller,
6930  * enable the RLC, disable interrupts, enable the IH
6931  * ring buffer and enable it (CIK).
6932  * Called at device load and reume.
6933  * Returns 0 for success, errors for failure.
6934  */
6935 static int cik_irq_init(struct radeon_device *rdev)
6936 {
6937         int ret = 0;
6938         int rb_bufsz;
6939         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6940
6941         /* allocate ring */
6942         ret = r600_ih_ring_alloc(rdev);
6943         if (ret)
6944                 return ret;
6945
6946         /* disable irqs */
6947         cik_disable_interrupts(rdev);
6948
6949         /* init rlc */
6950         ret = cik_rlc_resume(rdev);
6951         if (ret) {
6952                 r600_ih_ring_fini(rdev);
6953                 return ret;
6954         }
6955
6956         /* setup interrupt control */
6957         /* set dummy read address to dummy page address */
6958         WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6959         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6960         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6961          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6962          */
6963         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6964         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6965         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6966         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6967
6968         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6969         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6970
6971         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6972                       IH_WPTR_OVERFLOW_CLEAR |
6973                       (rb_bufsz << 1));
6974
6975         if (rdev->wb.enabled)
6976                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6977
6978         /* set the writeback address whether it's enabled or not */
6979         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6980         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6981
6982         WREG32(IH_RB_CNTL, ih_rb_cntl);
6983
6984         /* set rptr, wptr to 0 */
6985         WREG32(IH_RB_RPTR, 0);
6986         WREG32(IH_RB_WPTR, 0);
6987
6988         /* Default settings for IH_CNTL (disabled at first) */
6989         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6990         /* RPTR_REARM only works if msi's are enabled */
6991         if (rdev->msi_enabled)
6992                 ih_cntl |= RPTR_REARM;
6993         WREG32(IH_CNTL, ih_cntl);
6994
6995         /* force the active interrupt state to all disabled */
6996         cik_disable_interrupt_state(rdev);
6997
6998         pci_set_master(rdev->pdev);
6999
7000         /* enable irqs */
7001         cik_enable_interrupts(rdev);
7002
7003         return ret;
7004 }
7005
7006 /**
7007  * cik_irq_set - enable/disable interrupt sources
7008  *
7009  * @rdev: radeon_device pointer
7010  *
7011  * Enable interrupt sources on the GPU (vblanks, hpd,
7012  * etc.) (CIK).
7013  * Returns 0 for success, errors for failure.
7014  */
7015 int cik_irq_set(struct radeon_device *rdev)
7016 {
7017         u32 cp_int_cntl;
7018         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7019         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7020         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7021         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7022         u32 grbm_int_cntl = 0;
7023         u32 dma_cntl, dma_cntl1;
7024
7025         if (!rdev->irq.installed) {
7026                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7027                 return -EINVAL;
7028         }
7029         /* don't enable anything if the ih is disabled */
7030         if (!rdev->ih.enabled) {
7031                 cik_disable_interrupts(rdev);
7032                 /* force the active interrupt state to all disabled */
7033                 cik_disable_interrupt_state(rdev);
7034                 return 0;
7035         }
7036
7037         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7038                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7039         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7040
7041         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7042         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7043         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7044         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7045         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7046         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7047
7048         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7049         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7050
7051         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7052         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7053         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7054         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7055         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7056         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7057         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7058         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7059
7060         /* enable CP interrupts on all rings */
7061         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7062                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7063                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7064         }
7065         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7066                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7067                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7068                 if (ring->me == 1) {
7069                         switch (ring->pipe) {
7070                         case 0:
7071                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7072                                 break;
7073                         case 1:
7074                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7075                                 break;
7076                         case 2:
7077                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7078                                 break;
7079                         case 3:
7080                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7081                                 break;
7082                         default:
7083                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7084                                 break;
7085                         }
7086                 } else if (ring->me == 2) {
7087                         switch (ring->pipe) {
7088                         case 0:
7089                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7090                                 break;
7091                         case 1:
7092                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7093                                 break;
7094                         case 2:
7095                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7096                                 break;
7097                         case 3:
7098                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7099                                 break;
7100                         default:
7101                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7102                                 break;
7103                         }
7104                 } else {
7105                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7106                 }
7107         }
7108         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7109                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7110                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7111                 if (ring->me == 1) {
7112                         switch (ring->pipe) {
7113                         case 0:
7114                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7115                                 break;
7116                         case 1:
7117                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7118                                 break;
7119                         case 2:
7120                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7121                                 break;
7122                         case 3:
7123                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7124                                 break;
7125                         default:
7126                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7127                                 break;
7128                         }
7129                 } else if (ring->me == 2) {
7130                         switch (ring->pipe) {
7131                         case 0:
7132                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7133                                 break;
7134                         case 1:
7135                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7136                                 break;
7137                         case 2:
7138                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7139                                 break;
7140                         case 3:
7141                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7142                                 break;
7143                         default:
7144                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7145                                 break;
7146                         }
7147                 } else {
7148                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7149                 }
7150         }
7151
7152         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7153                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7154                 dma_cntl |= TRAP_ENABLE;
7155         }
7156
7157         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7158                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7159                 dma_cntl1 |= TRAP_ENABLE;
7160         }
7161
7162         if (rdev->irq.crtc_vblank_int[0] ||
7163             atomic_read(&rdev->irq.pflip[0])) {
7164                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7165                 crtc1 |= VBLANK_INTERRUPT_MASK;
7166         }
7167         if (rdev->irq.crtc_vblank_int[1] ||
7168             atomic_read(&rdev->irq.pflip[1])) {
7169                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7170                 crtc2 |= VBLANK_INTERRUPT_MASK;
7171         }
7172         if (rdev->irq.crtc_vblank_int[2] ||
7173             atomic_read(&rdev->irq.pflip[2])) {
7174                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7175                 crtc3 |= VBLANK_INTERRUPT_MASK;
7176         }
7177         if (rdev->irq.crtc_vblank_int[3] ||
7178             atomic_read(&rdev->irq.pflip[3])) {
7179                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7180                 crtc4 |= VBLANK_INTERRUPT_MASK;
7181         }
7182         if (rdev->irq.crtc_vblank_int[4] ||
7183             atomic_read(&rdev->irq.pflip[4])) {
7184                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7185                 crtc5 |= VBLANK_INTERRUPT_MASK;
7186         }
7187         if (rdev->irq.crtc_vblank_int[5] ||
7188             atomic_read(&rdev->irq.pflip[5])) {
7189                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7190                 crtc6 |= VBLANK_INTERRUPT_MASK;
7191         }
7192         if (rdev->irq.hpd[0]) {
7193                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7194                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7195         }
7196         if (rdev->irq.hpd[1]) {
7197                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7198                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7199         }
7200         if (rdev->irq.hpd[2]) {
7201                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7202                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7203         }
7204         if (rdev->irq.hpd[3]) {
7205                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7206                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7207         }
7208         if (rdev->irq.hpd[4]) {
7209                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7210                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7211         }
7212         if (rdev->irq.hpd[5]) {
7213                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7214                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7215         }
7216
7217         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7218
7219         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7220         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7221
7222         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7223         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7224         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7225         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7226         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7227         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7228         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7229         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7230
7231         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7232
7233         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7234         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7235         if (rdev->num_crtc >= 4) {
7236                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7237                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7238         }
7239         if (rdev->num_crtc >= 6) {
7240                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7241                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7242         }
7243
7244         if (rdev->num_crtc >= 2) {
7245                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7246                        GRPH_PFLIP_INT_MASK);
7247                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7248                        GRPH_PFLIP_INT_MASK);
7249         }
7250         if (rdev->num_crtc >= 4) {
7251                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7252                        GRPH_PFLIP_INT_MASK);
7253                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7254                        GRPH_PFLIP_INT_MASK);
7255         }
7256         if (rdev->num_crtc >= 6) {
7257                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7258                        GRPH_PFLIP_INT_MASK);
7259                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7260                        GRPH_PFLIP_INT_MASK);
7261         }
7262
7263         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7264         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7265         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7266         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7267         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7268         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7269
7270         /* posting read */
7271         RREG32(SRBM_STATUS);
7272
7273         return 0;
7274 }
7275
7276 /**
7277  * cik_irq_ack - ack interrupt sources
7278  *
7279  * @rdev: radeon_device pointer
7280  *
7281  * Ack interrupt sources on the GPU (vblanks, hpd,
7282  * etc.) (CIK).  Certain interrupts sources are sw
7283  * generated and do not require an explicit ack.
7284  */
7285 static inline void cik_irq_ack(struct radeon_device *rdev)
7286 {
7287         u32 tmp;
7288
7289         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7290         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7291         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7292         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7293         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7294         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7295         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7296
7297         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7298                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7299         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7300                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7301         if (rdev->num_crtc >= 4) {
7302                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7303                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7304                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7305                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7306         }
7307         if (rdev->num_crtc >= 6) {
7308                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7309                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7310                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7311                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7312         }
7313
7314         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7315                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7316                        GRPH_PFLIP_INT_CLEAR);
7317         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7318                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7319                        GRPH_PFLIP_INT_CLEAR);
7320         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7321                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7322         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7323                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7324         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7325                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7326         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7327                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7328
7329         if (rdev->num_crtc >= 4) {
7330                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7331                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7332                                GRPH_PFLIP_INT_CLEAR);
7333                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7334                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7335                                GRPH_PFLIP_INT_CLEAR);
7336                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7337                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7338                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7339                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7340                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7341                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7342                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7343                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7344         }
7345
7346         if (rdev->num_crtc >= 6) {
7347                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7348                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7349                                GRPH_PFLIP_INT_CLEAR);
7350                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7351                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7352                                GRPH_PFLIP_INT_CLEAR);
7353                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7354                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7355                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7356                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7357                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7358                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7359                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7360                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7361         }
7362
7363         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7364                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7365                 tmp |= DC_HPDx_INT_ACK;
7366                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7367         }
7368         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7369                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7370                 tmp |= DC_HPDx_INT_ACK;
7371                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7372         }
7373         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7374                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7375                 tmp |= DC_HPDx_INT_ACK;
7376                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7377         }
7378         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7379                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7380                 tmp |= DC_HPDx_INT_ACK;
7381                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7382         }
7383         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7384                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7385                 tmp |= DC_HPDx_INT_ACK;
7386                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7387         }
7388         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7389                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7390                 tmp |= DC_HPDx_INT_ACK;
7391                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7392         }
7393         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7394                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7395                 tmp |= DC_HPDx_RX_INT_ACK;
7396                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7397         }
7398         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7399                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7400                 tmp |= DC_HPDx_RX_INT_ACK;
7401                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7402         }
7403         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7404                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7405                 tmp |= DC_HPDx_RX_INT_ACK;
7406                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7407         }
7408         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7409                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7410                 tmp |= DC_HPDx_RX_INT_ACK;
7411                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7412         }
7413         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7414                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7415                 tmp |= DC_HPDx_RX_INT_ACK;
7416                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7417         }
7418         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7419                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7420                 tmp |= DC_HPDx_RX_INT_ACK;
7421                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7422         }
7423 }
7424
7425 /**
7426  * cik_irq_disable - disable interrupts
7427  *
7428  * @rdev: radeon_device pointer
7429  *
7430  * Disable interrupts on the hw (CIK).
7431  */
7432 static void cik_irq_disable(struct radeon_device *rdev)
7433 {
7434         cik_disable_interrupts(rdev);
7435         /* Wait and acknowledge irq */
7436         mdelay(1);
7437         cik_irq_ack(rdev);
7438         cik_disable_interrupt_state(rdev);
7439 }
7440
7441 /**
7442  * cik_irq_disable - disable interrupts for suspend
7443  *
7444  * @rdev: radeon_device pointer
7445  *
7446  * Disable interrupts and stop the RLC (CIK).
7447  * Used for suspend.
7448  */
7449 static void cik_irq_suspend(struct radeon_device *rdev)
7450 {
7451         cik_irq_disable(rdev);
7452         cik_rlc_stop(rdev);
7453 }
7454
7455 /**
7456  * cik_irq_fini - tear down interrupt support
7457  *
7458  * @rdev: radeon_device pointer
7459  *
7460  * Disable interrupts on the hw and free the IH ring
7461  * buffer (CIK).
7462  * Used for driver unload.
7463  */
7464 static void cik_irq_fini(struct radeon_device *rdev)
7465 {
7466         cik_irq_suspend(rdev);
7467         r600_ih_ring_fini(rdev);
7468 }
7469
7470 /**
7471  * cik_get_ih_wptr - get the IH ring buffer wptr
7472  *
7473  * @rdev: radeon_device pointer
7474  *
7475  * Get the IH ring buffer wptr from either the register
7476  * or the writeback memory buffer (CIK).  Also check for
7477  * ring buffer overflow and deal with it.
7478  * Used by cik_irq_process().
7479  * Returns the value of the wptr.
7480  */
7481 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7482 {
7483         u32 wptr, tmp;
7484
7485         if (rdev->wb.enabled)
7486                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7487         else
7488                 wptr = RREG32(IH_RB_WPTR);
7489
7490         if (wptr & RB_OVERFLOW) {
7491                 wptr &= ~RB_OVERFLOW;
7492                 /* When a ring buffer overflow happen start parsing interrupt
7493                  * from the last not overwritten vector (wptr + 16). Hopefully
7494                  * this should allow us to catchup.
7495                  */
7496                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7497                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7498                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7499                 tmp = RREG32(IH_RB_CNTL);
7500                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7501                 WREG32(IH_RB_CNTL, tmp);
7502         }
7503         return (wptr & rdev->ih.ptr_mask);
7504 }
7505
7506 /*        CIK IV Ring
7507  * Each IV ring entry is 128 bits:
7508  * [7:0]    - interrupt source id
7509  * [31:8]   - reserved
7510  * [59:32]  - interrupt source data
7511  * [63:60]  - reserved
7512  * [71:64]  - RINGID
7513  *            CP:
7514  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7515  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7516  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7517  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7518  *            PIPE_ID - ME0 0=3D
7519  *                    - ME1&2 compute dispatcher (4 pipes each)
7520  *            SDMA:
7521  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7522  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7523  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7524  * [79:72]  - VMID
7525  * [95:80]  - PASID
7526  * [127:96] - reserved
7527  */
7528 /**
7529  * cik_irq_process - interrupt handler
7530  *
7531  * @rdev: radeon_device pointer
7532  *
7533  * Interrupt hander (CIK).  Walk the IH ring,
7534  * ack interrupts and schedule work to handle
7535  * interrupt events.
7536  * Returns irq process return code.
7537  */
7538 int cik_irq_process(struct radeon_device *rdev)
7539 {
7540         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7541         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7542         u32 wptr;
7543         u32 rptr;
7544         u32 src_id, src_data, ring_id;
7545         u8 me_id, pipe_id, queue_id;
7546         u32 ring_index;
7547         bool queue_hotplug = false;
7548         bool queue_dp = false;
7549         bool queue_reset = false;
7550         u32 addr, status, mc_client;
7551         bool queue_thermal = false;
7552
7553         if (!rdev->ih.enabled || rdev->shutdown)
7554                 return IRQ_NONE;
7555
7556         wptr = cik_get_ih_wptr(rdev);
7557
7558 restart_ih:
7559         /* is somebody else already processing irqs? */
7560         if (atomic_xchg(&rdev->ih.lock, 1))
7561                 return IRQ_NONE;
7562
7563         rptr = rdev->ih.rptr;
7564         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7565
7566         /* Order reading of wptr vs. reading of IH ring data */
7567         rmb();
7568
7569         /* display interrupts */
7570         cik_irq_ack(rdev);
7571
7572         while (rptr != wptr) {
7573                 /* wptr/rptr are in bytes! */
7574                 ring_index = rptr / 4;
7575
7576                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7577                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7578                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7579
7580                 switch (src_id) {
7581                 case 1: /* D1 vblank/vline */
7582                         switch (src_data) {
7583                         case 0: /* D1 vblank */
7584                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7585                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7586
7587                                 if (rdev->irq.crtc_vblank_int[0]) {
7588                                         drm_handle_vblank(rdev->ddev, 0);
7589                                         rdev->pm.vblank_sync = true;
7590                                         wake_up(&rdev->irq.vblank_queue);
7591                                 }
7592                                 if (atomic_read(&rdev->irq.pflip[0]))
7593                                         radeon_crtc_handle_vblank(rdev, 0);
7594                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7595                                 DRM_DEBUG("IH: D1 vblank\n");
7596
7597                                 break;
7598                         case 1: /* D1 vline */
7599                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7600                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7601
7602                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7603                                 DRM_DEBUG("IH: D1 vline\n");
7604
7605                                 break;
7606                         default:
7607                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7608                                 break;
7609                         }
7610                         break;
7611                 case 2: /* D2 vblank/vline */
7612                         switch (src_data) {
7613                         case 0: /* D2 vblank */
7614                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7615                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7616
7617                                 if (rdev->irq.crtc_vblank_int[1]) {
7618                                         drm_handle_vblank(rdev->ddev, 1);
7619                                         rdev->pm.vblank_sync = true;
7620                                         wake_up(&rdev->irq.vblank_queue);
7621                                 }
7622                                 if (atomic_read(&rdev->irq.pflip[1]))
7623                                         radeon_crtc_handle_vblank(rdev, 1);
7624                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7625                                 DRM_DEBUG("IH: D2 vblank\n");
7626
7627                                 break;
7628                         case 1: /* D2 vline */
7629                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7630                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7631
7632                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7633                                 DRM_DEBUG("IH: D2 vline\n");
7634
7635                                 break;
7636                         default:
7637                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7638                                 break;
7639                         }
7640                         break;
7641                 case 3: /* D3 vblank/vline */
7642                         switch (src_data) {
7643                         case 0: /* D3 vblank */
7644                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7645                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7646
7647                                 if (rdev->irq.crtc_vblank_int[2]) {
7648                                         drm_handle_vblank(rdev->ddev, 2);
7649                                         rdev->pm.vblank_sync = true;
7650                                         wake_up(&rdev->irq.vblank_queue);
7651                                 }
7652                                 if (atomic_read(&rdev->irq.pflip[2]))
7653                                         radeon_crtc_handle_vblank(rdev, 2);
7654                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7655                                 DRM_DEBUG("IH: D3 vblank\n");
7656
7657                                 break;
7658                         case 1: /* D3 vline */
7659                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7660                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7661
7662                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7663                                 DRM_DEBUG("IH: D3 vline\n");
7664
7665                                 break;
7666                         default:
7667                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7668                                 break;
7669                         }
7670                         break;
7671                 case 4: /* D4 vblank/vline */
7672                         switch (src_data) {
7673                         case 0: /* D4 vblank */
7674                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7675                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7676
7677                                 if (rdev->irq.crtc_vblank_int[3]) {
7678                                         drm_handle_vblank(rdev->ddev, 3);
7679                                         rdev->pm.vblank_sync = true;
7680                                         wake_up(&rdev->irq.vblank_queue);
7681                                 }
7682                                 if (atomic_read(&rdev->irq.pflip[3]))
7683                                         radeon_crtc_handle_vblank(rdev, 3);
7684                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7685                                 DRM_DEBUG("IH: D4 vblank\n");
7686
7687                                 break;
7688                         case 1: /* D4 vline */
7689                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7690                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7691
7692                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7693                                 DRM_DEBUG("IH: D4 vline\n");
7694
7695                                 break;
7696                         default:
7697                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7698                                 break;
7699                         }
7700                         break;
7701                 case 5: /* D5 vblank/vline */
7702                         switch (src_data) {
7703                         case 0: /* D5 vblank */
7704                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7705                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7706
7707                                 if (rdev->irq.crtc_vblank_int[4]) {
7708                                         drm_handle_vblank(rdev->ddev, 4);
7709                                         rdev->pm.vblank_sync = true;
7710                                         wake_up(&rdev->irq.vblank_queue);
7711                                 }
7712                                 if (atomic_read(&rdev->irq.pflip[4]))
7713                                         radeon_crtc_handle_vblank(rdev, 4);
7714                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7715                                 DRM_DEBUG("IH: D5 vblank\n");
7716
7717                                 break;
7718                         case 1: /* D5 vline */
7719                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7720                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7721
7722                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7723                                 DRM_DEBUG("IH: D5 vline\n");
7724
7725                                 break;
7726                         default:
7727                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7728                                 break;
7729                         }
7730                         break;
7731                 case 6: /* D6 vblank/vline */
7732                         switch (src_data) {
7733                         case 0: /* D6 vblank */
7734                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7735                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7736
7737                                 if (rdev->irq.crtc_vblank_int[5]) {
7738                                         drm_handle_vblank(rdev->ddev, 5);
7739                                         rdev->pm.vblank_sync = true;
7740                                         wake_up(&rdev->irq.vblank_queue);
7741                                 }
7742                                 if (atomic_read(&rdev->irq.pflip[5]))
7743                                         radeon_crtc_handle_vblank(rdev, 5);
7744                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7745                                 DRM_DEBUG("IH: D6 vblank\n");
7746
7747                                 break;
7748                         case 1: /* D6 vline */
7749                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7750                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7751
7752                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7753                                 DRM_DEBUG("IH: D6 vline\n");
7754
7755                                 break;
7756                         default:
7757                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7758                                 break;
7759                         }
7760                         break;
7761                 case 8: /* D1 page flip */
7762                 case 10: /* D2 page flip */
7763                 case 12: /* D3 page flip */
7764                 case 14: /* D4 page flip */
7765                 case 16: /* D5 page flip */
7766                 case 18: /* D6 page flip */
7767                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7768                         if (radeon_use_pflipirq > 0)
7769                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7770                         break;
7771                 case 42: /* HPD hotplug */
7772                         switch (src_data) {
7773                         case 0:
7774                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7775                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7776
7777                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7778                                 queue_hotplug = true;
7779                                 DRM_DEBUG("IH: HPD1\n");
7780
7781                                 break;
7782                         case 1:
7783                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7784                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7785
7786                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7787                                 queue_hotplug = true;
7788                                 DRM_DEBUG("IH: HPD2\n");
7789
7790                                 break;
7791                         case 2:
7792                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7793                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7794
7795                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7796                                 queue_hotplug = true;
7797                                 DRM_DEBUG("IH: HPD3\n");
7798
7799                                 break;
7800                         case 3:
7801                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7802                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7803
7804                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7805                                 queue_hotplug = true;
7806                                 DRM_DEBUG("IH: HPD4\n");
7807
7808                                 break;
7809                         case 4:
7810                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7811                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7812
7813                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7814                                 queue_hotplug = true;
7815                                 DRM_DEBUG("IH: HPD5\n");
7816
7817                                 break;
7818                         case 5:
7819                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7820                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7821
7822                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7823                                 queue_hotplug = true;
7824                                 DRM_DEBUG("IH: HPD6\n");
7825
7826                                 break;
7827                         case 6:
7828                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7829                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7830
7831                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7832                                 queue_dp = true;
7833                                 DRM_DEBUG("IH: HPD_RX 1\n");
7834
7835                                 break;
7836                         case 7:
7837                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7838                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7839
7840                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7841                                 queue_dp = true;
7842                                 DRM_DEBUG("IH: HPD_RX 2\n");
7843
7844                                 break;
7845                         case 8:
7846                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7847                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7848
7849                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7850                                 queue_dp = true;
7851                                 DRM_DEBUG("IH: HPD_RX 3\n");
7852
7853                                 break;
7854                         case 9:
7855                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7856                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7857
7858                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7859                                 queue_dp = true;
7860                                 DRM_DEBUG("IH: HPD_RX 4\n");
7861
7862                                 break;
7863                         case 10:
7864                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7865                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7866
7867                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7868                                 queue_dp = true;
7869                                 DRM_DEBUG("IH: HPD_RX 5\n");
7870
7871                                 break;
7872                         case 11:
7873                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7874                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7875
7876                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7877                                 queue_dp = true;
7878                                 DRM_DEBUG("IH: HPD_RX 6\n");
7879
7880                                 break;
7881                         default:
7882                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7883                                 break;
7884                         }
7885                         break;
7886                 case 96:
7887                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7888                         WREG32(SRBM_INT_ACK, 0x1);
7889                         break;
7890                 case 124: /* UVD */
7891                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7892                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7893                         break;
7894                 case 146:
7895                 case 147:
7896                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7897                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7898                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7899                         /* reset addr and status */
7900                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7901                         if (addr == 0x0 && status == 0x0)
7902                                 break;
7903                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7904                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7905                                 addr);
7906                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7907                                 status);
7908                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7909                         break;
7910                 case 167: /* VCE */
7911                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7912                         switch (src_data) {
7913                         case 0:
7914                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7915                                 break;
7916                         case 1:
7917                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7918                                 break;
7919                         default:
7920                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7921                                 break;
7922                         }
7923                         break;
7924                 case 176: /* GFX RB CP_INT */
7925                 case 177: /* GFX IB CP_INT */
7926                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7927                         break;
7928                 case 181: /* CP EOP event */
7929                         DRM_DEBUG("IH: CP EOP\n");
7930                         /* XXX check the bitfield order! */
7931                         me_id = (ring_id & 0x60) >> 5;
7932                         pipe_id = (ring_id & 0x18) >> 3;
7933                         queue_id = (ring_id & 0x7) >> 0;
7934                         switch (me_id) {
7935                         case 0:
7936                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7937                                 break;
7938                         case 1:
7939                         case 2:
7940                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7941                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7942                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7943                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7944                                 break;
7945                         }
7946                         break;
7947                 case 184: /* CP Privileged reg access */
7948                         DRM_ERROR("Illegal register access in command stream\n");
7949                         /* XXX check the bitfield order! */
7950                         me_id = (ring_id & 0x60) >> 5;
7951                         pipe_id = (ring_id & 0x18) >> 3;
7952                         queue_id = (ring_id & 0x7) >> 0;
7953                         switch (me_id) {
7954                         case 0:
7955                                 /* This results in a full GPU reset, but all we need to do is soft
7956                                  * reset the CP for gfx
7957                                  */
7958                                 queue_reset = true;
7959                                 break;
7960                         case 1:
7961                                 /* XXX compute */
7962                                 queue_reset = true;
7963                                 break;
7964                         case 2:
7965                                 /* XXX compute */
7966                                 queue_reset = true;
7967                                 break;
7968                         }
7969                         break;
7970                 case 185: /* CP Privileged inst */
7971                         DRM_ERROR("Illegal instruction in command stream\n");
7972                         /* XXX check the bitfield order! */
7973                         me_id = (ring_id & 0x60) >> 5;
7974                         pipe_id = (ring_id & 0x18) >> 3;
7975                         queue_id = (ring_id & 0x7) >> 0;
7976                         switch (me_id) {
7977                         case 0:
7978                                 /* This results in a full GPU reset, but all we need to do is soft
7979                                  * reset the CP for gfx
7980                                  */
7981                                 queue_reset = true;
7982                                 break;
7983                         case 1:
7984                                 /* XXX compute */
7985                                 queue_reset = true;
7986                                 break;
7987                         case 2:
7988                                 /* XXX compute */
7989                                 queue_reset = true;
7990                                 break;
7991                         }
7992                         break;
7993                 case 224: /* SDMA trap event */
7994                         /* XXX check the bitfield order! */
7995                         me_id = (ring_id & 0x3) >> 0;
7996                         queue_id = (ring_id & 0xc) >> 2;
7997                         DRM_DEBUG("IH: SDMA trap\n");
7998                         switch (me_id) {
7999                         case 0:
8000                                 switch (queue_id) {
8001                                 case 0:
8002                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8003                                         break;
8004                                 case 1:
8005                                         /* XXX compute */
8006                                         break;
8007                                 case 2:
8008                                         /* XXX compute */
8009                                         break;
8010                                 }
8011                                 break;
8012                         case 1:
8013                                 switch (queue_id) {
8014                                 case 0:
8015                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8016                                         break;
8017                                 case 1:
8018                                         /* XXX compute */
8019                                         break;
8020                                 case 2:
8021                                         /* XXX compute */
8022                                         break;
8023                                 }
8024                                 break;
8025                         }
8026                         break;
8027                 case 230: /* thermal low to high */
8028                         DRM_DEBUG("IH: thermal low to high\n");
8029                         rdev->pm.dpm.thermal.high_to_low = false;
8030                         queue_thermal = true;
8031                         break;
8032                 case 231: /* thermal high to low */
8033                         DRM_DEBUG("IH: thermal high to low\n");
8034                         rdev->pm.dpm.thermal.high_to_low = true;
8035                         queue_thermal = true;
8036                         break;
8037                 case 233: /* GUI IDLE */
8038                         DRM_DEBUG("IH: GUI idle\n");
8039                         break;
8040                 case 241: /* SDMA Privileged inst */
8041                 case 247: /* SDMA Privileged inst */
8042                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8043                         /* XXX check the bitfield order! */
8044                         me_id = (ring_id & 0x3) >> 0;
8045                         queue_id = (ring_id & 0xc) >> 2;
8046                         switch (me_id) {
8047                         case 0:
8048                                 switch (queue_id) {
8049                                 case 0:
8050                                         queue_reset = true;
8051                                         break;
8052                                 case 1:
8053                                         /* XXX compute */
8054                                         queue_reset = true;
8055                                         break;
8056                                 case 2:
8057                                         /* XXX compute */
8058                                         queue_reset = true;
8059                                         break;
8060                                 }
8061                                 break;
8062                         case 1:
8063                                 switch (queue_id) {
8064                                 case 0:
8065                                         queue_reset = true;
8066                                         break;
8067                                 case 1:
8068                                         /* XXX compute */
8069                                         queue_reset = true;
8070                                         break;
8071                                 case 2:
8072                                         /* XXX compute */
8073                                         queue_reset = true;
8074                                         break;
8075                                 }
8076                                 break;
8077                         }
8078                         break;
8079                 default:
8080                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8081                         break;
8082                 }
8083
8084                 /* wptr/rptr are in bytes! */
8085                 rptr += 16;
8086                 rptr &= rdev->ih.ptr_mask;
8087                 WREG32(IH_RB_RPTR, rptr);
8088         }
8089         if (queue_dp)
8090                 schedule_work(&rdev->dp_work);
8091         if (queue_hotplug)
8092                 schedule_delayed_work(&rdev->hotplug_work, 0);
8093         if (queue_reset) {
8094                 rdev->needs_reset = true;
8095                 wake_up_all(&rdev->fence_queue);
8096         }
8097         if (queue_thermal)
8098                 schedule_work(&rdev->pm.dpm.thermal.work);
8099         rdev->ih.rptr = rptr;
8100         atomic_set(&rdev->ih.lock, 0);
8101
8102         /* make sure wptr hasn't changed while processing */
8103         wptr = cik_get_ih_wptr(rdev);
8104         if (wptr != rptr)
8105                 goto restart_ih;
8106
8107         return IRQ_HANDLED;
8108 }
8109
8110 /*
8111  * startup/shutdown callbacks
8112  */
8113 static void cik_uvd_init(struct radeon_device *rdev)
8114 {
8115         int r;
8116
8117         if (!rdev->has_uvd)
8118                 return;
8119
8120         r = radeon_uvd_init(rdev);
8121         if (r) {
8122                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8123                 /*
8124                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8125                  * to early fails cik_uvd_start() and thus nothing happens
8126                  * there. So it is pointless to try to go through that code
8127                  * hence why we disable uvd here.
8128                  */
8129                 rdev->has_uvd = false;
8130                 return;
8131         }
8132         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8133         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8134 }
8135
8136 static void cik_uvd_start(struct radeon_device *rdev)
8137 {
8138         int r;
8139
8140         if (!rdev->has_uvd)
8141                 return;
8142
8143         r = radeon_uvd_resume(rdev);
8144         if (r) {
8145                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8146                 goto error;
8147         }
8148         r = uvd_v4_2_resume(rdev);
8149         if (r) {
8150                 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8151                 goto error;
8152         }
8153         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8154         if (r) {
8155                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8156                 goto error;
8157         }
8158         return;
8159
8160 error:
8161         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8162 }
8163
8164 static void cik_uvd_resume(struct radeon_device *rdev)
8165 {
8166         struct radeon_ring *ring;
8167         int r;
8168
8169         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8170                 return;
8171
8172         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8173         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8174         if (r) {
8175                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8176                 return;
8177         }
8178         r = uvd_v1_0_init(rdev);
8179         if (r) {
8180                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8181                 return;
8182         }
8183 }
8184
8185 static void cik_vce_init(struct radeon_device *rdev)
8186 {
8187         int r;
8188
8189         if (!rdev->has_vce)
8190                 return;
8191
8192         r = radeon_vce_init(rdev);
8193         if (r) {
8194                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8195                 /*
8196                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
8197                  * to early fails cik_vce_start() and thus nothing happens
8198                  * there. So it is pointless to try to go through that code
8199                  * hence why we disable vce here.
8200                  */
8201                 rdev->has_vce = false;
8202                 return;
8203         }
8204         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8205         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8206         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8207         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8208 }
8209
8210 static void cik_vce_start(struct radeon_device *rdev)
8211 {
8212         int r;
8213
8214         if (!rdev->has_vce)
8215                 return;
8216
8217         r = radeon_vce_resume(rdev);
8218         if (r) {
8219                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8220                 goto error;
8221         }
8222         r = vce_v2_0_resume(rdev);
8223         if (r) {
8224                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8225                 goto error;
8226         }
8227         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8228         if (r) {
8229                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8230                 goto error;
8231         }
8232         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8233         if (r) {
8234                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8235                 goto error;
8236         }
8237         return;
8238
8239 error:
8240         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8241         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8242 }
8243
8244 static void cik_vce_resume(struct radeon_device *rdev)
8245 {
8246         struct radeon_ring *ring;
8247         int r;
8248
8249         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8250                 return;
8251
8252         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8253         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8254         if (r) {
8255                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8256                 return;
8257         }
8258         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8259         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8260         if (r) {
8261                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8262                 return;
8263         }
8264         r = vce_v1_0_init(rdev);
8265         if (r) {
8266                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8267                 return;
8268         }
8269 }
8270
8271 /**
8272  * cik_startup - program the asic to a functional state
8273  *
8274  * @rdev: radeon_device pointer
8275  *
8276  * Programs the asic to a functional state (CIK).
8277  * Called by cik_init() and cik_resume().
8278  * Returns 0 for success, error for failure.
8279  */
8280 static int cik_startup(struct radeon_device *rdev)
8281 {
8282         struct radeon_ring *ring;
8283         u32 nop;
8284         int r;
8285
8286         /* enable pcie gen2/3 link */
8287         cik_pcie_gen3_enable(rdev);
8288         /* enable aspm */
8289         cik_program_aspm(rdev);
8290
8291         /* scratch needs to be initialized before MC */
8292         r = r600_vram_scratch_init(rdev);
8293         if (r)
8294                 return r;
8295
8296         cik_mc_program(rdev);
8297
8298         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8299                 r = ci_mc_load_microcode(rdev);
8300                 if (r) {
8301                         DRM_ERROR("Failed to load MC firmware!\n");
8302                         return r;
8303                 }
8304         }
8305
8306         r = cik_pcie_gart_enable(rdev);
8307         if (r)
8308                 return r;
8309         cik_gpu_init(rdev);
8310
8311         /* allocate rlc buffers */
8312         if (rdev->flags & RADEON_IS_IGP) {
8313                 if (rdev->family == CHIP_KAVERI) {
8314                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8315                         rdev->rlc.reg_list_size =
8316                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8317                 } else {
8318                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8319                         rdev->rlc.reg_list_size =
8320                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8321                 }
8322         }
8323         rdev->rlc.cs_data = ci_cs_data;
8324         rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8325         rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8326         r = sumo_rlc_init(rdev);
8327         if (r) {
8328                 DRM_ERROR("Failed to init rlc BOs!\n");
8329                 return r;
8330         }
8331
8332         /* allocate wb buffer */
8333         r = radeon_wb_init(rdev);
8334         if (r)
8335                 return r;
8336
8337         /* allocate mec buffers */
8338         r = cik_mec_init(rdev);
8339         if (r) {
8340                 DRM_ERROR("Failed to init MEC BOs!\n");
8341                 return r;
8342         }
8343
8344         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8345         if (r) {
8346                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8347                 return r;
8348         }
8349
8350         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8351         if (r) {
8352                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8353                 return r;
8354         }
8355
8356         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8357         if (r) {
8358                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8359                 return r;
8360         }
8361
8362         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8363         if (r) {
8364                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8365                 return r;
8366         }
8367
8368         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8369         if (r) {
8370                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8371                 return r;
8372         }
8373
8374         cik_uvd_start(rdev);
8375         cik_vce_start(rdev);
8376
8377         /* Enable IRQ */
8378         if (!rdev->irq.installed) {
8379                 r = radeon_irq_kms_init(rdev);
8380                 if (r)
8381                         return r;
8382         }
8383
8384         r = cik_irq_init(rdev);
8385         if (r) {
8386                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8387                 radeon_irq_kms_fini(rdev);
8388                 return r;
8389         }
8390         cik_irq_set(rdev);
8391
8392         if (rdev->family == CHIP_HAWAII) {
8393                 if (rdev->new_fw)
8394                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8395                 else
8396                         nop = RADEON_CP_PACKET2;
8397         } else {
8398                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8399         }
8400
8401         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8402         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8403                              nop);
8404         if (r)
8405                 return r;
8406
8407         /* set up the compute queues */
8408         /* type-2 packets are deprecated on MEC, use type-3 instead */
8409         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8410         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8411                              nop);
8412         if (r)
8413                 return r;
8414         ring->me = 1; /* first MEC */
8415         ring->pipe = 0; /* first pipe */
8416         ring->queue = 0; /* first queue */
8417         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8418
8419         /* type-2 packets are deprecated on MEC, use type-3 instead */
8420         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8421         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8422                              nop);
8423         if (r)
8424                 return r;
8425         /* dGPU only have 1 MEC */
8426         ring->me = 1; /* first MEC */
8427         ring->pipe = 0; /* first pipe */
8428         ring->queue = 1; /* second queue */
8429         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8430
8431         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8432         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8433                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8434         if (r)
8435                 return r;
8436
8437         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8438         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8439                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8440         if (r)
8441                 return r;
8442
8443         r = cik_cp_resume(rdev);
8444         if (r)
8445                 return r;
8446
8447         r = cik_sdma_resume(rdev);
8448         if (r)
8449                 return r;
8450
8451         cik_uvd_resume(rdev);
8452         cik_vce_resume(rdev);
8453
8454         r = radeon_ib_pool_init(rdev);
8455         if (r) {
8456                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8457                 return r;
8458         }
8459
8460         r = radeon_vm_manager_init(rdev);
8461         if (r) {
8462                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8463                 return r;
8464         }
8465
8466         r = radeon_audio_init(rdev);
8467         if (r)
8468                 return r;
8469
8470         return 0;
8471 }
8472
8473 /**
8474  * cik_resume - resume the asic to a functional state
8475  *
8476  * @rdev: radeon_device pointer
8477  *
8478  * Programs the asic to a functional state (CIK).
8479  * Called at resume.
8480  * Returns 0 for success, error for failure.
8481  */
8482 int cik_resume(struct radeon_device *rdev)
8483 {
8484         int r;
8485
8486         /* post card */
8487         atom_asic_init(rdev->mode_info.atom_context);
8488
8489         /* init golden registers */
8490         cik_init_golden_registers(rdev);
8491
8492         if (rdev->pm.pm_method == PM_METHOD_DPM)
8493                 radeon_pm_resume(rdev);
8494
8495         rdev->accel_working = true;
8496         r = cik_startup(rdev);
8497         if (r) {
8498                 DRM_ERROR("cik startup failed on resume\n");
8499                 rdev->accel_working = false;
8500                 return r;
8501         }
8502
8503         return r;
8504
8505 }
8506
8507 /**
8508  * cik_suspend - suspend the asic
8509  *
8510  * @rdev: radeon_device pointer
8511  *
8512  * Bring the chip into a state suitable for suspend (CIK).
8513  * Called at suspend.
8514  * Returns 0 for success.
8515  */
8516 int cik_suspend(struct radeon_device *rdev)
8517 {
8518         radeon_pm_suspend(rdev);
8519         radeon_audio_fini(rdev);
8520         radeon_vm_manager_fini(rdev);
8521         cik_cp_enable(rdev, false);
8522         cik_sdma_enable(rdev, false);
8523         if (rdev->has_uvd) {
8524                 uvd_v1_0_fini(rdev);
8525                 radeon_uvd_suspend(rdev);
8526         }
8527         if (rdev->has_vce)
8528                 radeon_vce_suspend(rdev);
8529         cik_fini_pg(rdev);
8530         cik_fini_cg(rdev);
8531         cik_irq_suspend(rdev);
8532         radeon_wb_disable(rdev);
8533         cik_pcie_gart_disable(rdev);
8534         return 0;
8535 }
8536
8537 /* Plan is to move initialization in that function and use
8538  * helper function so that radeon_device_init pretty much
8539  * do nothing more than calling asic specific function. This
8540  * should also allow to remove a bunch of callback function
8541  * like vram_info.
8542  */
8543 /**
8544  * cik_init - asic specific driver and hw init
8545  *
8546  * @rdev: radeon_device pointer
8547  *
8548  * Setup asic specific driver variables and program the hw
8549  * to a functional state (CIK).
8550  * Called at driver startup.
8551  * Returns 0 for success, errors for failure.
8552  */
8553 int cik_init(struct radeon_device *rdev)
8554 {
8555         struct radeon_ring *ring;
8556         int r;
8557
8558         /* Read BIOS */
8559         if (!radeon_get_bios(rdev)) {
8560                 if (ASIC_IS_AVIVO(rdev))
8561                         return -EINVAL;
8562         }
8563         /* Must be an ATOMBIOS */
8564         if (!rdev->is_atom_bios) {
8565                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8566                 return -EINVAL;
8567         }
8568         r = radeon_atombios_init(rdev);
8569         if (r)
8570                 return r;
8571
8572         /* Post card if necessary */
8573         if (!radeon_card_posted(rdev)) {
8574                 if (!rdev->bios) {
8575                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8576                         return -EINVAL;
8577                 }
8578                 DRM_INFO("GPU not posted. posting now...\n");
8579                 atom_asic_init(rdev->mode_info.atom_context);
8580         }
8581         /* init golden registers */
8582         cik_init_golden_registers(rdev);
8583         /* Initialize scratch registers */
8584         cik_scratch_init(rdev);
8585         /* Initialize surface registers */
8586         radeon_surface_init(rdev);
8587         /* Initialize clocks */
8588         radeon_get_clock_info(rdev->ddev);
8589
8590         /* Fence driver */
8591         r = radeon_fence_driver_init(rdev);
8592         if (r)
8593                 return r;
8594
8595         /* initialize memory controller */
8596         r = cik_mc_init(rdev);
8597         if (r)
8598                 return r;
8599         /* Memory manager */
8600         r = radeon_bo_init(rdev);
8601         if (r)
8602                 return r;
8603
8604         if (rdev->flags & RADEON_IS_IGP) {
8605                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8606                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8607                         r = cik_init_microcode(rdev);
8608                         if (r) {
8609                                 DRM_ERROR("Failed to load firmware!\n");
8610                                 return r;
8611                         }
8612                 }
8613         } else {
8614                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8615                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8616                     !rdev->mc_fw) {
8617                         r = cik_init_microcode(rdev);
8618                         if (r) {
8619                                 DRM_ERROR("Failed to load firmware!\n");
8620                                 return r;
8621                         }
8622                 }
8623         }
8624
8625         /* Initialize power management */
8626         radeon_pm_init(rdev);
8627
8628         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8629         ring->ring_obj = NULL;
8630         r600_ring_init(rdev, ring, 1024 * 1024);
8631
8632         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8633         ring->ring_obj = NULL;
8634         r600_ring_init(rdev, ring, 1024 * 1024);
8635         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8636         if (r)
8637                 return r;
8638
8639         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8640         ring->ring_obj = NULL;
8641         r600_ring_init(rdev, ring, 1024 * 1024);
8642         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8643         if (r)
8644                 return r;
8645
8646         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8647         ring->ring_obj = NULL;
8648         r600_ring_init(rdev, ring, 256 * 1024);
8649
8650         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8651         ring->ring_obj = NULL;
8652         r600_ring_init(rdev, ring, 256 * 1024);
8653
8654         cik_uvd_init(rdev);
8655         cik_vce_init(rdev);
8656
8657         rdev->ih.ring_obj = NULL;
8658         r600_ih_ring_init(rdev, 64 * 1024);
8659
8660         r = r600_pcie_gart_init(rdev);
8661         if (r)
8662                 return r;
8663
8664         rdev->accel_working = true;
8665         r = cik_startup(rdev);
8666         if (r) {
8667                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8668                 cik_cp_fini(rdev);
8669                 cik_sdma_fini(rdev);
8670                 cik_irq_fini(rdev);
8671                 sumo_rlc_fini(rdev);
8672                 cik_mec_fini(rdev);
8673                 radeon_wb_fini(rdev);
8674                 radeon_ib_pool_fini(rdev);
8675                 radeon_vm_manager_fini(rdev);
8676                 radeon_irq_kms_fini(rdev);
8677                 cik_pcie_gart_fini(rdev);
8678                 rdev->accel_working = false;
8679         }
8680
8681         /* Don't start up if the MC ucode is missing.
8682          * The default clocks and voltages before the MC ucode
8683          * is loaded are not suffient for advanced operations.
8684          */
8685         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8686                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8687                 return -EINVAL;
8688         }
8689
8690         return 0;
8691 }
8692
8693 /**
8694  * cik_fini - asic specific driver and hw fini
8695  *
8696  * @rdev: radeon_device pointer
8697  *
8698  * Tear down the asic specific driver variables and program the hw
8699  * to an idle state (CIK).
8700  * Called at driver unload.
8701  */
8702 void cik_fini(struct radeon_device *rdev)
8703 {
8704         radeon_pm_fini(rdev);
8705         cik_cp_fini(rdev);
8706         cik_sdma_fini(rdev);
8707         cik_fini_pg(rdev);
8708         cik_fini_cg(rdev);
8709         cik_irq_fini(rdev);
8710         sumo_rlc_fini(rdev);
8711         cik_mec_fini(rdev);
8712         radeon_wb_fini(rdev);
8713         radeon_vm_manager_fini(rdev);
8714         radeon_ib_pool_fini(rdev);
8715         radeon_irq_kms_fini(rdev);
8716         uvd_v1_0_fini(rdev);
8717         radeon_uvd_fini(rdev);
8718         radeon_vce_fini(rdev);
8719         cik_pcie_gart_fini(rdev);
8720         r600_vram_scratch_fini(rdev);
8721         radeon_gem_fini(rdev);
8722         radeon_fence_driver_fini(rdev);
8723         radeon_bo_fini(rdev);
8724         radeon_atombios_fini(rdev);
8725         kfree(rdev->bios);
8726         rdev->bios = NULL;
8727 }
8728
8729 void dce8_program_fmt(struct drm_encoder *encoder)
8730 {
8731         struct drm_device *dev = encoder->dev;
8732         struct radeon_device *rdev = dev->dev_private;
8733         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8734         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8735         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8736         int bpc = 0;
8737         u32 tmp = 0;
8738         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8739
8740         if (connector) {
8741                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8742                 bpc = radeon_get_monitor_bpc(connector);
8743                 dither = radeon_connector->dither;
8744         }
8745
8746         /* LVDS/eDP FMT is set up by atom */
8747         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8748                 return;
8749
8750         /* not needed for analog */
8751         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8752             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8753                 return;
8754
8755         if (bpc == 0)
8756                 return;
8757
8758         switch (bpc) {
8759         case 6:
8760                 if (dither == RADEON_FMT_DITHER_ENABLE)
8761                         /* XXX sort out optimal dither settings */
8762                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8763                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8764                 else
8765                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8766                 break;
8767         case 8:
8768                 if (dither == RADEON_FMT_DITHER_ENABLE)
8769                         /* XXX sort out optimal dither settings */
8770                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8771                                 FMT_RGB_RANDOM_ENABLE |
8772                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8773                 else
8774                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8775                 break;
8776         case 10:
8777                 if (dither == RADEON_FMT_DITHER_ENABLE)
8778                         /* XXX sort out optimal dither settings */
8779                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8780                                 FMT_RGB_RANDOM_ENABLE |
8781                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8782                 else
8783                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8784                 break;
8785         default:
8786                 /* not needed */
8787                 break;
8788         }
8789
8790         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8791 }
8792
8793 /* display watermark setup */
8794 /**
8795  * dce8_line_buffer_adjust - Set up the line buffer
8796  *
8797  * @rdev: radeon_device pointer
8798  * @radeon_crtc: the selected display controller
8799  * @mode: the current display mode on the selected display
8800  * controller
8801  *
8802  * Setup up the line buffer allocation for
8803  * the selected display controller (CIK).
8804  * Returns the line buffer size in pixels.
8805  */
8806 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8807                                    struct radeon_crtc *radeon_crtc,
8808                                    struct drm_display_mode *mode)
8809 {
8810         u32 tmp, buffer_alloc, i;
8811         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8812         /*
8813          * Line Buffer Setup
8814          * There are 6 line buffers, one for each display controllers.
8815          * There are 3 partitions per LB. Select the number of partitions
8816          * to enable based on the display width.  For display widths larger
8817          * than 4096, you need use to use 2 display controllers and combine
8818          * them using the stereo blender.
8819          */
8820         if (radeon_crtc->base.enabled && mode) {
8821                 if (mode->crtc_hdisplay < 1920) {
8822                         tmp = 1;
8823                         buffer_alloc = 2;
8824                 } else if (mode->crtc_hdisplay < 2560) {
8825                         tmp = 2;
8826                         buffer_alloc = 2;
8827                 } else if (mode->crtc_hdisplay < 4096) {
8828                         tmp = 0;
8829                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8830                 } else {
8831                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8832                         tmp = 0;
8833                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8834                 }
8835         } else {
8836                 tmp = 1;
8837                 buffer_alloc = 0;
8838         }
8839
8840         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8841                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8842
8843         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8844                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8845         for (i = 0; i < rdev->usec_timeout; i++) {
8846                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8847                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8848                         break;
8849                 udelay(1);
8850         }
8851
8852         if (radeon_crtc->base.enabled && mode) {
8853                 switch (tmp) {
8854                 case 0:
8855                 default:
8856                         return 4096 * 2;
8857                 case 1:
8858                         return 1920 * 2;
8859                 case 2:
8860                         return 2560 * 2;
8861                 }
8862         }
8863
8864         /* controller not enabled, so no lb used */
8865         return 0;
8866 }
8867
8868 /**
8869  * cik_get_number_of_dram_channels - get the number of dram channels
8870  *
8871  * @rdev: radeon_device pointer
8872  *
8873  * Look up the number of video ram channels (CIK).
8874  * Used for display watermark bandwidth calculations
8875  * Returns the number of dram channels
8876  */
8877 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8878 {
8879         u32 tmp = RREG32(MC_SHARED_CHMAP);
8880
8881         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8882         case 0:
8883         default:
8884                 return 1;
8885         case 1:
8886                 return 2;
8887         case 2:
8888                 return 4;
8889         case 3:
8890                 return 8;
8891         case 4:
8892                 return 3;
8893         case 5:
8894                 return 6;
8895         case 6:
8896                 return 10;
8897         case 7:
8898                 return 12;
8899         case 8:
8900                 return 16;
8901         }
8902 }
8903
8904 struct dce8_wm_params {
8905         u32 dram_channels; /* number of dram channels */
8906         u32 yclk;          /* bandwidth per dram data pin in kHz */
8907         u32 sclk;          /* engine clock in kHz */
8908         u32 disp_clk;      /* display clock in kHz */
8909         u32 src_width;     /* viewport width */
8910         u32 active_time;   /* active display time in ns */
8911         u32 blank_time;    /* blank time in ns */
8912         bool interlaced;    /* mode is interlaced */
8913         fixed20_12 vsc;    /* vertical scale ratio */
8914         u32 num_heads;     /* number of active crtcs */
8915         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8916         u32 lb_size;       /* line buffer allocated to pipe */
8917         u32 vtaps;         /* vertical scaler taps */
8918 };
8919
8920 /**
8921  * dce8_dram_bandwidth - get the dram bandwidth
8922  *
8923  * @wm: watermark calculation data
8924  *
8925  * Calculate the raw dram bandwidth (CIK).
8926  * Used for display watermark bandwidth calculations
8927  * Returns the dram bandwidth in MBytes/s
8928  */
8929 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8930 {
8931         /* Calculate raw DRAM Bandwidth */
8932         fixed20_12 dram_efficiency; /* 0.7 */
8933         fixed20_12 yclk, dram_channels, bandwidth;
8934         fixed20_12 a;
8935
8936         a.full = dfixed_const(1000);
8937         yclk.full = dfixed_const(wm->yclk);
8938         yclk.full = dfixed_div(yclk, a);
8939         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8940         a.full = dfixed_const(10);
8941         dram_efficiency.full = dfixed_const(7);
8942         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8943         bandwidth.full = dfixed_mul(dram_channels, yclk);
8944         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8945
8946         return dfixed_trunc(bandwidth);
8947 }
8948
8949 /**
8950  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8951  *
8952  * @wm: watermark calculation data
8953  *
8954  * Calculate the dram bandwidth used for display (CIK).
8955  * Used for display watermark bandwidth calculations
8956  * Returns the dram bandwidth for display in MBytes/s
8957  */
8958 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8959 {
8960         /* Calculate DRAM Bandwidth and the part allocated to display. */
8961         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8962         fixed20_12 yclk, dram_channels, bandwidth;
8963         fixed20_12 a;
8964
8965         a.full = dfixed_const(1000);
8966         yclk.full = dfixed_const(wm->yclk);
8967         yclk.full = dfixed_div(yclk, a);
8968         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8969         a.full = dfixed_const(10);
8970         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8971         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8972         bandwidth.full = dfixed_mul(dram_channels, yclk);
8973         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8974
8975         return dfixed_trunc(bandwidth);
8976 }
8977
8978 /**
8979  * dce8_data_return_bandwidth - get the data return bandwidth
8980  *
8981  * @wm: watermark calculation data
8982  *
8983  * Calculate the data return bandwidth used for display (CIK).
8984  * Used for display watermark bandwidth calculations
8985  * Returns the data return bandwidth in MBytes/s
8986  */
8987 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8988 {
8989         /* Calculate the display Data return Bandwidth */
8990         fixed20_12 return_efficiency; /* 0.8 */
8991         fixed20_12 sclk, bandwidth;
8992         fixed20_12 a;
8993
8994         a.full = dfixed_const(1000);
8995         sclk.full = dfixed_const(wm->sclk);
8996         sclk.full = dfixed_div(sclk, a);
8997         a.full = dfixed_const(10);
8998         return_efficiency.full = dfixed_const(8);
8999         return_efficiency.full = dfixed_div(return_efficiency, a);
9000         a.full = dfixed_const(32);
9001         bandwidth.full = dfixed_mul(a, sclk);
9002         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9003
9004         return dfixed_trunc(bandwidth);
9005 }
9006
9007 /**
9008  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9009  *
9010  * @wm: watermark calculation data
9011  *
9012  * Calculate the dmif bandwidth used for display (CIK).
9013  * Used for display watermark bandwidth calculations
9014  * Returns the dmif bandwidth in MBytes/s
9015  */
9016 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9017 {
9018         /* Calculate the DMIF Request Bandwidth */
9019         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9020         fixed20_12 disp_clk, bandwidth;
9021         fixed20_12 a, b;
9022
9023         a.full = dfixed_const(1000);
9024         disp_clk.full = dfixed_const(wm->disp_clk);
9025         disp_clk.full = dfixed_div(disp_clk, a);
9026         a.full = dfixed_const(32);
9027         b.full = dfixed_mul(a, disp_clk);
9028
9029         a.full = dfixed_const(10);
9030         disp_clk_request_efficiency.full = dfixed_const(8);
9031         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9032
9033         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9034
9035         return dfixed_trunc(bandwidth);
9036 }
9037
9038 /**
9039  * dce8_available_bandwidth - get the min available bandwidth
9040  *
9041  * @wm: watermark calculation data
9042  *
9043  * Calculate the min available bandwidth used for display (CIK).
9044  * Used for display watermark bandwidth calculations
9045  * Returns the min available bandwidth in MBytes/s
9046  */
9047 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9048 {
9049         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9050         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9051         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9052         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9053
9054         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9055 }
9056
9057 /**
9058  * dce8_average_bandwidth - get the average available bandwidth
9059  *
9060  * @wm: watermark calculation data
9061  *
9062  * Calculate the average available bandwidth used for display (CIK).
9063  * Used for display watermark bandwidth calculations
9064  * Returns the average available bandwidth in MBytes/s
9065  */
9066 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9067 {
9068         /* Calculate the display mode Average Bandwidth
9069          * DisplayMode should contain the source and destination dimensions,
9070          * timing, etc.
9071          */
9072         fixed20_12 bpp;
9073         fixed20_12 line_time;
9074         fixed20_12 src_width;
9075         fixed20_12 bandwidth;
9076         fixed20_12 a;
9077
9078         a.full = dfixed_const(1000);
9079         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9080         line_time.full = dfixed_div(line_time, a);
9081         bpp.full = dfixed_const(wm->bytes_per_pixel);
9082         src_width.full = dfixed_const(wm->src_width);
9083         bandwidth.full = dfixed_mul(src_width, bpp);
9084         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9085         bandwidth.full = dfixed_div(bandwidth, line_time);
9086
9087         return dfixed_trunc(bandwidth);
9088 }
9089
9090 /**
9091  * dce8_latency_watermark - get the latency watermark
9092  *
9093  * @wm: watermark calculation data
9094  *
9095  * Calculate the latency watermark (CIK).
9096  * Used for display watermark bandwidth calculations
9097  * Returns the latency watermark in ns
9098  */
9099 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9100 {
9101         /* First calculate the latency in ns */
9102         u32 mc_latency = 2000; /* 2000 ns. */
9103         u32 available_bandwidth = dce8_available_bandwidth(wm);
9104         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9105         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9106         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9107         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9108                 (wm->num_heads * cursor_line_pair_return_time);
9109         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9110         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9111         u32 tmp, dmif_size = 12288;
9112         fixed20_12 a, b, c;
9113
9114         if (wm->num_heads == 0)
9115                 return 0;
9116
9117         a.full = dfixed_const(2);
9118         b.full = dfixed_const(1);
9119         if ((wm->vsc.full > a.full) ||
9120             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9121             (wm->vtaps >= 5) ||
9122             ((wm->vsc.full >= a.full) && wm->interlaced))
9123                 max_src_lines_per_dst_line = 4;
9124         else
9125                 max_src_lines_per_dst_line = 2;
9126
9127         a.full = dfixed_const(available_bandwidth);
9128         b.full = dfixed_const(wm->num_heads);
9129         a.full = dfixed_div(a, b);
9130         tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9131         tmp = min(dfixed_trunc(a), tmp);
9132
9133         lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9134
9135         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9136         b.full = dfixed_const(1000);
9137         c.full = dfixed_const(lb_fill_bw);
9138         b.full = dfixed_div(c, b);
9139         a.full = dfixed_div(a, b);
9140         line_fill_time = dfixed_trunc(a);
9141
9142         if (line_fill_time < wm->active_time)
9143                 return latency;
9144         else
9145                 return latency + (line_fill_time - wm->active_time);
9146
9147 }
9148
9149 /**
9150  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9151  * average and available dram bandwidth
9152  *
9153  * @wm: watermark calculation data
9154  *
9155  * Check if the display average bandwidth fits in the display
9156  * dram bandwidth (CIK).
9157  * Used for display watermark bandwidth calculations
9158  * Returns true if the display fits, false if not.
9159  */
9160 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9161 {
9162         if (dce8_average_bandwidth(wm) <=
9163             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9164                 return true;
9165         else
9166                 return false;
9167 }
9168
9169 /**
9170  * dce8_average_bandwidth_vs_available_bandwidth - check
9171  * average and available bandwidth
9172  *
9173  * @wm: watermark calculation data
9174  *
9175  * Check if the display average bandwidth fits in the display
9176  * available bandwidth (CIK).
9177  * Used for display watermark bandwidth calculations
9178  * Returns true if the display fits, false if not.
9179  */
9180 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9181 {
9182         if (dce8_average_bandwidth(wm) <=
9183             (dce8_available_bandwidth(wm) / wm->num_heads))
9184                 return true;
9185         else
9186                 return false;
9187 }
9188
9189 /**
9190  * dce8_check_latency_hiding - check latency hiding
9191  *
9192  * @wm: watermark calculation data
9193  *
9194  * Check latency hiding (CIK).
9195  * Used for display watermark bandwidth calculations
9196  * Returns true if the display fits, false if not.
9197  */
9198 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9199 {
9200         u32 lb_partitions = wm->lb_size / wm->src_width;
9201         u32 line_time = wm->active_time + wm->blank_time;
9202         u32 latency_tolerant_lines;
9203         u32 latency_hiding;
9204         fixed20_12 a;
9205
9206         a.full = dfixed_const(1);
9207         if (wm->vsc.full > a.full)
9208                 latency_tolerant_lines = 1;
9209         else {
9210                 if (lb_partitions <= (wm->vtaps + 1))
9211                         latency_tolerant_lines = 1;
9212                 else
9213                         latency_tolerant_lines = 2;
9214         }
9215
9216         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9217
9218         if (dce8_latency_watermark(wm) <= latency_hiding)
9219                 return true;
9220         else
9221                 return false;
9222 }
9223
9224 /**
9225  * dce8_program_watermarks - program display watermarks
9226  *
9227  * @rdev: radeon_device pointer
9228  * @radeon_crtc: the selected display controller
9229  * @lb_size: line buffer size
9230  * @num_heads: number of display controllers in use
9231  *
9232  * Calculate and program the display watermarks for the
9233  * selected display controller (CIK).
9234  */
9235 static void dce8_program_watermarks(struct radeon_device *rdev,
9236                                     struct radeon_crtc *radeon_crtc,
9237                                     u32 lb_size, u32 num_heads)
9238 {
9239         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9240         struct dce8_wm_params wm_low, wm_high;
9241         u32 active_time;
9242         u32 line_time = 0;
9243         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9244         u32 tmp, wm_mask;
9245
9246         if (radeon_crtc->base.enabled && num_heads && mode) {
9247                 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9248                                             (u32)mode->clock);
9249                 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9250                                           (u32)mode->clock);
9251                 line_time = min(line_time, (u32)65535);
9252
9253                 /* watermark for high clocks */
9254                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9255                     rdev->pm.dpm_enabled) {
9256                         wm_high.yclk =
9257                                 radeon_dpm_get_mclk(rdev, false) * 10;
9258                         wm_high.sclk =
9259                                 radeon_dpm_get_sclk(rdev, false) * 10;
9260                 } else {
9261                         wm_high.yclk = rdev->pm.current_mclk * 10;
9262                         wm_high.sclk = rdev->pm.current_sclk * 10;
9263                 }
9264
9265                 wm_high.disp_clk = mode->clock;
9266                 wm_high.src_width = mode->crtc_hdisplay;
9267                 wm_high.active_time = active_time;
9268                 wm_high.blank_time = line_time - wm_high.active_time;
9269                 wm_high.interlaced = false;
9270                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9271                         wm_high.interlaced = true;
9272                 wm_high.vsc = radeon_crtc->vsc;
9273                 wm_high.vtaps = 1;
9274                 if (radeon_crtc->rmx_type != RMX_OFF)
9275                         wm_high.vtaps = 2;
9276                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9277                 wm_high.lb_size = lb_size;
9278                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9279                 wm_high.num_heads = num_heads;
9280
9281                 /* set for high clocks */
9282                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9283
9284                 /* possibly force display priority to high */
9285                 /* should really do this at mode validation time... */
9286                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9287                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9288                     !dce8_check_latency_hiding(&wm_high) ||
9289                     (rdev->disp_priority == 2)) {
9290                         DRM_DEBUG_KMS("force priority to high\n");
9291                 }
9292
9293                 /* watermark for low clocks */
9294                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9295                     rdev->pm.dpm_enabled) {
9296                         wm_low.yclk =
9297                                 radeon_dpm_get_mclk(rdev, true) * 10;
9298                         wm_low.sclk =
9299                                 radeon_dpm_get_sclk(rdev, true) * 10;
9300                 } else {
9301                         wm_low.yclk = rdev->pm.current_mclk * 10;
9302                         wm_low.sclk = rdev->pm.current_sclk * 10;
9303                 }
9304
9305                 wm_low.disp_clk = mode->clock;
9306                 wm_low.src_width = mode->crtc_hdisplay;
9307                 wm_low.active_time = active_time;
9308                 wm_low.blank_time = line_time - wm_low.active_time;
9309                 wm_low.interlaced = false;
9310                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9311                         wm_low.interlaced = true;
9312                 wm_low.vsc = radeon_crtc->vsc;
9313                 wm_low.vtaps = 1;
9314                 if (radeon_crtc->rmx_type != RMX_OFF)
9315                         wm_low.vtaps = 2;
9316                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9317                 wm_low.lb_size = lb_size;
9318                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9319                 wm_low.num_heads = num_heads;
9320
9321                 /* set for low clocks */
9322                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9323
9324                 /* possibly force display priority to high */
9325                 /* should really do this at mode validation time... */
9326                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9327                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9328                     !dce8_check_latency_hiding(&wm_low) ||
9329                     (rdev->disp_priority == 2)) {
9330                         DRM_DEBUG_KMS("force priority to high\n");
9331                 }
9332
9333                 /* Save number of lines the linebuffer leads before the scanout */
9334                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9335         }
9336
9337         /* select wm A */
9338         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9339         tmp = wm_mask;
9340         tmp &= ~LATENCY_WATERMARK_MASK(3);
9341         tmp |= LATENCY_WATERMARK_MASK(1);
9342         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9343         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9344                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9345                 LATENCY_HIGH_WATERMARK(line_time)));
9346         /* select wm B */
9347         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9348         tmp &= ~LATENCY_WATERMARK_MASK(3);
9349         tmp |= LATENCY_WATERMARK_MASK(2);
9350         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9351         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9352                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9353                 LATENCY_HIGH_WATERMARK(line_time)));
9354         /* restore original selection */
9355         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9356
9357         /* save values for DPM */
9358         radeon_crtc->line_time = line_time;
9359         radeon_crtc->wm_high = latency_watermark_a;
9360         radeon_crtc->wm_low = latency_watermark_b;
9361 }
9362
9363 /**
9364  * dce8_bandwidth_update - program display watermarks
9365  *
9366  * @rdev: radeon_device pointer
9367  *
9368  * Calculate and program the display watermarks and line
9369  * buffer allocation (CIK).
9370  */
9371 void dce8_bandwidth_update(struct radeon_device *rdev)
9372 {
9373         struct drm_display_mode *mode = NULL;
9374         u32 num_heads = 0, lb_size;
9375         int i;
9376
9377         if (!rdev->mode_info.mode_config_initialized)
9378                 return;
9379
9380         radeon_update_display_priority(rdev);
9381
9382         for (i = 0; i < rdev->num_crtc; i++) {
9383                 if (rdev->mode_info.crtcs[i]->base.enabled)
9384                         num_heads++;
9385         }
9386         for (i = 0; i < rdev->num_crtc; i++) {
9387                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9388                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9389                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9390         }
9391 }
9392
9393 /**
9394  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9395  *
9396  * @rdev: radeon_device pointer
9397  *
9398  * Fetches a GPU clock counter snapshot (SI).
9399  * Returns the 64 bit clock counter snapshot.
9400  */
9401 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9402 {
9403         uint64_t clock;
9404
9405         mutex_lock(&rdev->gpu_clock_mutex);
9406         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9407         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9408                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9409         mutex_unlock(&rdev->gpu_clock_mutex);
9410         return clock;
9411 }
9412
9413 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9414                              u32 cntl_reg, u32 status_reg)
9415 {
9416         int r, i;
9417         struct atom_clock_dividers dividers;
9418         uint32_t tmp;
9419
9420         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9421                                            clock, false, &dividers);
9422         if (r)
9423                 return r;
9424
9425         tmp = RREG32_SMC(cntl_reg);
9426         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9427         tmp |= dividers.post_divider;
9428         WREG32_SMC(cntl_reg, tmp);
9429
9430         for (i = 0; i < 100; i++) {
9431                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9432                         break;
9433                 mdelay(10);
9434         }
9435         if (i == 100)
9436                 return -ETIMEDOUT;
9437
9438         return 0;
9439 }
9440
9441 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9442 {
9443         int r = 0;
9444
9445         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9446         if (r)
9447                 return r;
9448
9449         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9450         return r;
9451 }
9452
9453 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9454 {
9455         int r, i;
9456         struct atom_clock_dividers dividers;
9457         u32 tmp;
9458
9459         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9460                                            ecclk, false, &dividers);
9461         if (r)
9462                 return r;
9463
9464         for (i = 0; i < 100; i++) {
9465                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9466                         break;
9467                 mdelay(10);
9468         }
9469         if (i == 100)
9470                 return -ETIMEDOUT;
9471
9472         tmp = RREG32_SMC(CG_ECLK_CNTL);
9473         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9474         tmp |= dividers.post_divider;
9475         WREG32_SMC(CG_ECLK_CNTL, tmp);
9476
9477         for (i = 0; i < 100; i++) {
9478                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9479                         break;
9480                 mdelay(10);
9481         }
9482         if (i == 100)
9483                 return -ETIMEDOUT;
9484
9485         return 0;
9486 }
9487
9488 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9489 {
9490         struct pci_dev *root = rdev->pdev->bus->self;
9491         enum pci_bus_speed speed_cap;
9492         u32 speed_cntl, current_data_rate;
9493         int i;
9494         u16 tmp16;
9495
9496         if (pci_is_root_bus(rdev->pdev->bus))
9497                 return;
9498
9499         if (radeon_pcie_gen2 == 0)
9500                 return;
9501
9502         if (rdev->flags & RADEON_IS_IGP)
9503                 return;
9504
9505         if (!(rdev->flags & RADEON_IS_PCIE))
9506                 return;
9507
9508         speed_cap = pcie_get_speed_cap(root);
9509         if (speed_cap == PCI_SPEED_UNKNOWN)
9510                 return;
9511
9512         if ((speed_cap != PCIE_SPEED_8_0GT) &&
9513             (speed_cap != PCIE_SPEED_5_0GT))
9514                 return;
9515
9516         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9517         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9518                 LC_CURRENT_DATA_RATE_SHIFT;
9519         if (speed_cap == PCIE_SPEED_8_0GT) {
9520                 if (current_data_rate == 2) {
9521                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9522                         return;
9523                 }
9524                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9525         } else if (speed_cap == PCIE_SPEED_5_0GT) {
9526                 if (current_data_rate == 1) {
9527                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9528                         return;
9529                 }
9530                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9531         }
9532
9533         if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
9534                 return;
9535
9536         if (speed_cap == PCIE_SPEED_8_0GT) {
9537                 /* re-try equalization if gen3 is not already enabled */
9538                 if (current_data_rate != 2) {
9539                         u16 bridge_cfg, gpu_cfg;
9540                         u16 bridge_cfg2, gpu_cfg2;
9541                         u32 max_lw, current_lw, tmp;
9542
9543                         pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9544                                                   &bridge_cfg);
9545                         pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
9546                                                   &gpu_cfg);
9547
9548                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9549                         pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
9550
9551                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9552                         pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
9553                                                    tmp16);
9554
9555                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9556                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9557                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9558
9559                         if (current_lw < max_lw) {
9560                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9561                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9562                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9563                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9564                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9565                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9566                                 }
9567                         }
9568
9569                         for (i = 0; i < 10; i++) {
9570                                 /* check status */
9571                                 pcie_capability_read_word(rdev->pdev,
9572                                                           PCI_EXP_DEVSTA,
9573                                                           &tmp16);
9574                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9575                                         break;
9576
9577                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9578                                                           &bridge_cfg);
9579                                 pcie_capability_read_word(rdev->pdev,
9580                                                           PCI_EXP_LNKCTL,
9581                                                           &gpu_cfg);
9582
9583                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9584                                                           &bridge_cfg2);
9585                                 pcie_capability_read_word(rdev->pdev,
9586                                                           PCI_EXP_LNKCTL2,
9587                                                           &gpu_cfg2);
9588
9589                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9590                                 tmp |= LC_SET_QUIESCE;
9591                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9592
9593                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9594                                 tmp |= LC_REDO_EQ;
9595                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9596
9597                                 msleep(100);
9598
9599                                 /* linkctl */
9600                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9601                                                           &tmp16);
9602                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9603                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9604                                 pcie_capability_write_word(root, PCI_EXP_LNKCTL,
9605                                                            tmp16);
9606
9607                                 pcie_capability_read_word(rdev->pdev,
9608                                                           PCI_EXP_LNKCTL,
9609                                                           &tmp16);
9610                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9611                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9612                                 pcie_capability_write_word(rdev->pdev,
9613                                                            PCI_EXP_LNKCTL,
9614                                                            tmp16);
9615
9616                                 /* linkctl2 */
9617                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9618                                                           &tmp16);
9619                                 tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
9620                                            PCI_EXP_LNKCTL2_TX_MARGIN);
9621                                 tmp16 |= (bridge_cfg2 &
9622                                           (PCI_EXP_LNKCTL2_ENTER_COMP |
9623                                            PCI_EXP_LNKCTL2_TX_MARGIN));
9624                                 pcie_capability_write_word(root,
9625                                                            PCI_EXP_LNKCTL2,
9626                                                            tmp16);
9627
9628                                 pcie_capability_read_word(rdev->pdev,
9629                                                           PCI_EXP_LNKCTL2,
9630                                                           &tmp16);
9631                                 tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
9632                                            PCI_EXP_LNKCTL2_TX_MARGIN);
9633                                 tmp16 |= (gpu_cfg2 &
9634                                           (PCI_EXP_LNKCTL2_ENTER_COMP |
9635                                            PCI_EXP_LNKCTL2_TX_MARGIN));
9636                                 pcie_capability_write_word(rdev->pdev,
9637                                                            PCI_EXP_LNKCTL2,
9638                                                            tmp16);
9639
9640                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9641                                 tmp &= ~LC_SET_QUIESCE;
9642                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9643                         }
9644                 }
9645         }
9646
9647         /* set the link speed */
9648         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9649         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9650         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9651
9652         pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
9653         tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
9654         if (speed_cap == PCIE_SPEED_8_0GT)
9655                 tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
9656         else if (speed_cap == PCIE_SPEED_5_0GT)
9657                 tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
9658         else
9659                 tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
9660         pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
9661
9662         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9663         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9664         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9665
9666         for (i = 0; i < rdev->usec_timeout; i++) {
9667                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9668                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9669                         break;
9670                 udelay(1);
9671         }
9672 }
9673
9674 static void cik_program_aspm(struct radeon_device *rdev)
9675 {
9676         u32 data, orig;
9677         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9678         bool disable_clkreq = false;
9679
9680         if (radeon_aspm == 0)
9681                 return;
9682
9683         /* XXX double check IGPs */
9684         if (rdev->flags & RADEON_IS_IGP)
9685                 return;
9686
9687         if (!(rdev->flags & RADEON_IS_PCIE))
9688                 return;
9689
9690         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9691         data &= ~LC_XMIT_N_FTS_MASK;
9692         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9693         if (orig != data)
9694                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9695
9696         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9697         data |= LC_GO_TO_RECOVERY;
9698         if (orig != data)
9699                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9700
9701         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9702         data |= P_IGNORE_EDB_ERR;
9703         if (orig != data)
9704                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9705
9706         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9707         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9708         data |= LC_PMI_TO_L1_DIS;
9709         if (!disable_l0s)
9710                 data |= LC_L0S_INACTIVITY(7);
9711
9712         if (!disable_l1) {
9713                 data |= LC_L1_INACTIVITY(7);
9714                 data &= ~LC_PMI_TO_L1_DIS;
9715                 if (orig != data)
9716                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9717
9718                 if (!disable_plloff_in_l1) {
9719                         bool clk_req_support;
9720
9721                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9722                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9723                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9724                         if (orig != data)
9725                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9726
9727                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9728                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9729                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9730                         if (orig != data)
9731                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9732
9733                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9734                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9735                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9736                         if (orig != data)
9737                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9738
9739                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9740                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9741                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9742                         if (orig != data)
9743                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9744
9745                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9746                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9747                         data |= LC_DYN_LANES_PWR_STATE(3);
9748                         if (orig != data)
9749                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9750
9751                         if (!disable_clkreq &&
9752                             !pci_is_root_bus(rdev->pdev->bus)) {
9753                                 struct pci_dev *root = rdev->pdev->bus->self;
9754                                 u32 lnkcap;
9755
9756                                 clk_req_support = false;
9757                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9758                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9759                                         clk_req_support = true;
9760                         } else {
9761                                 clk_req_support = false;
9762                         }
9763
9764                         if (clk_req_support) {
9765                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9766                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9767                                 if (orig != data)
9768                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9769
9770                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9771                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9772                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9773                                 if (orig != data)
9774                                         WREG32_SMC(THM_CLK_CNTL, data);
9775
9776                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9777                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9778                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9779                                 if (orig != data)
9780                                         WREG32_SMC(MISC_CLK_CTRL, data);
9781
9782                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9783                                 data &= ~BCLK_AS_XCLK;
9784                                 if (orig != data)
9785                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9786
9787                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9788                                 data &= ~FORCE_BIF_REFCLK_EN;
9789                                 if (orig != data)
9790                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9791
9792                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9793                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9794                                 data |= MPLL_CLKOUT_SEL(4);
9795                                 if (orig != data)
9796                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9797                         }
9798                 }
9799         } else {
9800                 if (orig != data)
9801                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9802         }
9803
9804         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9805         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9806         if (orig != data)
9807                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9808
9809         if (!disable_l0s) {
9810                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9811                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9812                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9813                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9814                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9815                                 data &= ~LC_L0S_INACTIVITY_MASK;
9816                                 if (orig != data)
9817                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9818                         }
9819                 }
9820         }
9821 }