drm/msm: devcoredump iommu fault support
[linux-2.6-microblaze.git] / drivers / gpu / drm / msm / adreno / a5xx_gpu.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3  */
4
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/cpumask.h>
8 #include <linux/qcom_scm.h>
9 #include <linux/pm_opp.h>
10 #include <linux/nvmem-consumer.h>
11 #include <linux/slab.h>
12 #include "msm_gem.h"
13 #include "msm_mmu.h"
14 #include "a5xx_gpu.h"
15
16 extern bool hang_debug;
17 static void a5xx_dump(struct msm_gpu *gpu);
18
19 #define GPU_PAS_ID 13
20
21 void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
22                 bool sync)
23 {
24         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
25         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
26         uint32_t wptr;
27         unsigned long flags;
28
29         /*
30          * Most flush operations need to issue a WHERE_AM_I opcode to sync up
31          * the rptr shadow
32          */
33         if (a5xx_gpu->has_whereami && sync) {
34                 OUT_PKT7(ring, CP_WHERE_AM_I, 2);
35                 OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
36                 OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
37         }
38
39         spin_lock_irqsave(&ring->preempt_lock, flags);
40
41         /* Copy the shadow to the actual register */
42         ring->cur = ring->next;
43
44         /* Make sure to wrap wptr if we need to */
45         wptr = get_wptr(ring);
46
47         spin_unlock_irqrestore(&ring->preempt_lock, flags);
48
49         /* Make sure everything is posted before making a decision */
50         mb();
51
52         /* Update HW if this is the current ring and we are not in preempt */
53         if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
54                 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
55 }
56
57 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit)
58 {
59         struct msm_drm_private *priv = gpu->dev->dev_private;
60         struct msm_ringbuffer *ring = submit->ring;
61         struct msm_gem_object *obj;
62         uint32_t *ptr, dwords;
63         unsigned int i;
64
65         for (i = 0; i < submit->nr_cmds; i++) {
66                 switch (submit->cmd[i].type) {
67                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
68                         break;
69                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
70                         if (priv->lastctx == submit->queue->ctx)
71                                 break;
72                         fallthrough;
73                 case MSM_SUBMIT_CMD_BUF:
74                         /* copy commands into RB: */
75                         obj = submit->bos[submit->cmd[i].idx].obj;
76                         dwords = submit->cmd[i].size;
77
78                         ptr = msm_gem_get_vaddr(&obj->base);
79
80                         /* _get_vaddr() shouldn't fail at this point,
81                          * since we've already mapped it once in
82                          * submit_reloc()
83                          */
84                         if (WARN_ON(!ptr))
85                                 return;
86
87                         for (i = 0; i < dwords; i++) {
88                                 /* normally the OUT_PKTn() would wait
89                                  * for space for the packet.  But since
90                                  * we just OUT_RING() the whole thing,
91                                  * need to call adreno_wait_ring()
92                                  * ourself:
93                                  */
94                                 adreno_wait_ring(ring, 1);
95                                 OUT_RING(ring, ptr[i]);
96                         }
97
98                         msm_gem_put_vaddr(&obj->base);
99
100                         break;
101                 }
102         }
103
104         a5xx_flush(gpu, ring, true);
105         a5xx_preempt_trigger(gpu);
106
107         /* we might not necessarily have a cmd from userspace to
108          * trigger an event to know that submit has completed, so
109          * do this manually:
110          */
111         a5xx_idle(gpu, ring);
112         ring->memptrs->fence = submit->seqno;
113         msm_gpu_retire(gpu);
114 }
115
116 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
117 {
118         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
119         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
120         struct msm_drm_private *priv = gpu->dev->dev_private;
121         struct msm_ringbuffer *ring = submit->ring;
122         unsigned int i, ibs = 0;
123
124         if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
125                 priv->lastctx = NULL;
126                 a5xx_submit_in_rb(gpu, submit);
127                 return;
128         }
129
130         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
131         OUT_RING(ring, 0x02);
132
133         /* Turn off protected mode to write to special registers */
134         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
135         OUT_RING(ring, 0);
136
137         /* Set the save preemption record for the ring/command */
138         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
139         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
140         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
141
142         /* Turn back on protected mode */
143         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
144         OUT_RING(ring, 1);
145
146         /* Enable local preemption for finegrain preemption */
147         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
148         OUT_RING(ring, 0x02);
149
150         /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
151         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
152         OUT_RING(ring, 0x02);
153
154         /* Submit the commands */
155         for (i = 0; i < submit->nr_cmds; i++) {
156                 switch (submit->cmd[i].type) {
157                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
158                         break;
159                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
160                         if (priv->lastctx == submit->queue->ctx)
161                                 break;
162                         fallthrough;
163                 case MSM_SUBMIT_CMD_BUF:
164                         OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
165                         OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
166                         OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
167                         OUT_RING(ring, submit->cmd[i].size);
168                         ibs++;
169                         break;
170                 }
171         }
172
173         /*
174          * Write the render mode to NULL (0) to indicate to the CP that the IBs
175          * are done rendering - otherwise a lucky preemption would start
176          * replaying from the last checkpoint
177          */
178         OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
179         OUT_RING(ring, 0);
180         OUT_RING(ring, 0);
181         OUT_RING(ring, 0);
182         OUT_RING(ring, 0);
183         OUT_RING(ring, 0);
184
185         /* Turn off IB level preemptions */
186         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
187         OUT_RING(ring, 0x01);
188
189         /* Write the fence to the scratch register */
190         OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
191         OUT_RING(ring, submit->seqno);
192
193         /*
194          * Execute a CACHE_FLUSH_TS event. This will ensure that the
195          * timestamp is written to the memory and then triggers the interrupt
196          */
197         OUT_PKT7(ring, CP_EVENT_WRITE, 4);
198         OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
199                 CP_EVENT_WRITE_0_IRQ);
200         OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
201         OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
202         OUT_RING(ring, submit->seqno);
203
204         /* Yield the floor on command completion */
205         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
206         /*
207          * If dword[2:1] are non zero, they specify an address for the CP to
208          * write the value of dword[3] to on preemption complete. Write 0 to
209          * skip the write
210          */
211         OUT_RING(ring, 0x00);
212         OUT_RING(ring, 0x00);
213         /* Data value - not used if the address above is 0 */
214         OUT_RING(ring, 0x01);
215         /* Set bit 0 to trigger an interrupt on preempt complete */
216         OUT_RING(ring, 0x01);
217
218         /* A WHERE_AM_I packet is not needed after a YIELD */
219         a5xx_flush(gpu, ring, false);
220
221         /* Check to see if we need to start preemption */
222         a5xx_preempt_trigger(gpu);
223 }
224
225 static const struct adreno_five_hwcg_regs {
226         u32 offset;
227         u32 value;
228 } a5xx_hwcg[] = {
229         {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
230         {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
231         {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
232         {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
233         {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
234         {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
235         {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
236         {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
237         {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
238         {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
239         {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
240         {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
241         {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
242         {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
243         {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
244         {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
245         {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
246         {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
247         {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
248         {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
249         {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
250         {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
251         {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
252         {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
253         {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
254         {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
255         {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
256         {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
257         {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
258         {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
259         {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
260         {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
261         {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
262         {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
263         {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
264         {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
265         {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
266         {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
267         {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
268         {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
269         {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
270         {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
271         {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
272         {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
273         {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
274         {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
275         {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
276         {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
277         {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
278         {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
279         {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
280         {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
281         {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
282         {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
283         {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
284         {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
285         {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
286         {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
287         {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
288         {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
289         {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
290         {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
291         {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
292         {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
293         {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
294         {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
295         {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
296         {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
297         {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
298         {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
299         {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
300         {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
301         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
302         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
303         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
304         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
305         {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
306         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
307         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
308         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
309         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
310         {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
311         {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
312         {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
313         {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
314         {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
315         {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
316         {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
317         {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
318         {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
319         {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
320         {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
321 }, a50x_hwcg[] = {
322         {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
323         {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
324         {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
325         {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
326         {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
327         {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
328         {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
329         {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
330         {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
331         {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
332         {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
333         {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
334         {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
335         {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
336         {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
337         {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
338         {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
339         {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00FFFFF4},
340         {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
341         {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
342         {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
343         {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
344         {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
345         {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
346         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
347         {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
348         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
349         {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
350         {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
351         {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
352         {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
353         {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
354         {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
355         {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
356         {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
357         {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
358         {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
359         {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
360 }, a512_hwcg[] = {
361         {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
362         {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
363         {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
364         {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
365         {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
366         {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
367         {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
368         {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
369         {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
370         {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
371         {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
372         {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
373         {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
374         {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
375         {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
376         {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
377         {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
378         {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
379         {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
380         {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
381         {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
382         {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
383         {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
384         {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
385         {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
386         {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
387         {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
388         {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
389         {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
390         {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
391         {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
392         {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
393         {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
394         {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
395         {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
396         {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
397         {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
398         {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
399         {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
400         {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
401         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
402         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
403         {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
404         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
405         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
406         {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
407         {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
408         {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
409         {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
410         {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
411         {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
412         {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
413         {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
414         {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
415         {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
416         {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
417 };
418
419 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
420 {
421         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
422         const struct adreno_five_hwcg_regs *regs;
423         unsigned int i, sz;
424
425         if (adreno_is_a508(adreno_gpu)) {
426                 regs = a50x_hwcg;
427                 sz = ARRAY_SIZE(a50x_hwcg);
428         } else if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu)) {
429                 regs = a512_hwcg;
430                 sz = ARRAY_SIZE(a512_hwcg);
431         } else {
432                 regs = a5xx_hwcg;
433                 sz = ARRAY_SIZE(a5xx_hwcg);
434         }
435
436         for (i = 0; i < sz; i++)
437                 gpu_write(gpu, regs[i].offset,
438                           state ? regs[i].value : 0);
439
440         if (adreno_is_a540(adreno_gpu)) {
441                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
442                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
443         }
444
445         gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
446         gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
447 }
448
449 static int a5xx_me_init(struct msm_gpu *gpu)
450 {
451         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
452         struct msm_ringbuffer *ring = gpu->rb[0];
453
454         OUT_PKT7(ring, CP_ME_INIT, 8);
455
456         OUT_RING(ring, 0x0000002F);
457
458         /* Enable multiple hardware contexts */
459         OUT_RING(ring, 0x00000003);
460
461         /* Enable error detection */
462         OUT_RING(ring, 0x20000000);
463
464         /* Don't enable header dump */
465         OUT_RING(ring, 0x00000000);
466         OUT_RING(ring, 0x00000000);
467
468         /* Specify workarounds for various microcode issues */
469         if (adreno_is_a530(adreno_gpu)) {
470                 /* Workaround for token end syncs
471                  * Force a WFI after every direct-render 3D mode draw and every
472                  * 2D mode 3 draw
473                  */
474                 OUT_RING(ring, 0x0000000B);
475         } else if (adreno_is_a510(adreno_gpu)) {
476                 /* Workaround for token and syncs */
477                 OUT_RING(ring, 0x00000001);
478         } else {
479                 /* No workarounds enabled */
480                 OUT_RING(ring, 0x00000000);
481         }
482
483         OUT_RING(ring, 0x00000000);
484         OUT_RING(ring, 0x00000000);
485
486         a5xx_flush(gpu, ring, true);
487         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
488 }
489
490 static int a5xx_preempt_start(struct msm_gpu *gpu)
491 {
492         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
493         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
494         struct msm_ringbuffer *ring = gpu->rb[0];
495
496         if (gpu->nr_rings == 1)
497                 return 0;
498
499         /* Turn off protected mode to write to special registers */
500         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
501         OUT_RING(ring, 0);
502
503         /* Set the save preemption record for the ring/command */
504         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
505         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
506         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
507
508         /* Turn back on protected mode */
509         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
510         OUT_RING(ring, 1);
511
512         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
513         OUT_RING(ring, 0x00);
514
515         OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
516         OUT_RING(ring, 0x01);
517
518         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
519         OUT_RING(ring, 0x01);
520
521         /* Yield the floor on command completion */
522         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
523         OUT_RING(ring, 0x00);
524         OUT_RING(ring, 0x00);
525         OUT_RING(ring, 0x01);
526         OUT_RING(ring, 0x01);
527
528         /* The WHERE_AMI_I packet is not needed after a YIELD is issued */
529         a5xx_flush(gpu, ring, false);
530
531         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
532 }
533
534 static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu,
535                 struct drm_gem_object *obj)
536 {
537         u32 *buf = msm_gem_get_vaddr(obj);
538
539         if (IS_ERR(buf))
540                 return;
541
542         /*
543          * If the lowest nibble is 0xa that is an indication that this microcode
544          * has been patched. The actual version is in dword [3] but we only care
545          * about the patchlevel which is the lowest nibble of dword [3]
546          */
547         if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
548                 a5xx_gpu->has_whereami = true;
549
550         msm_gem_put_vaddr(obj);
551 }
552
553 static int a5xx_ucode_init(struct msm_gpu *gpu)
554 {
555         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
556         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
557         int ret;
558
559         if (!a5xx_gpu->pm4_bo) {
560                 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
561                         adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
562
563
564                 if (IS_ERR(a5xx_gpu->pm4_bo)) {
565                         ret = PTR_ERR(a5xx_gpu->pm4_bo);
566                         a5xx_gpu->pm4_bo = NULL;
567                         DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
568                                 ret);
569                         return ret;
570                 }
571
572                 msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
573         }
574
575         if (!a5xx_gpu->pfp_bo) {
576                 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
577                         adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
578
579                 if (IS_ERR(a5xx_gpu->pfp_bo)) {
580                         ret = PTR_ERR(a5xx_gpu->pfp_bo);
581                         a5xx_gpu->pfp_bo = NULL;
582                         DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
583                                 ret);
584                         return ret;
585                 }
586
587                 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
588                 a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
589         }
590
591         gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
592                 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
593
594         gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
595                 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
596
597         return 0;
598 }
599
600 #define SCM_GPU_ZAP_SHADER_RESUME 0
601
602 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
603 {
604         int ret;
605
606         ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
607         if (ret)
608                 DRM_ERROR("%s: zap-shader resume failed: %d\n",
609                         gpu->name, ret);
610
611         return ret;
612 }
613
614 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
615 {
616         static bool loaded;
617         int ret;
618
619         /*
620          * If the zap shader is already loaded into memory we just need to kick
621          * the remote processor to reinitialize it
622          */
623         if (loaded)
624                 return a5xx_zap_shader_resume(gpu);
625
626         ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
627
628         loaded = !ret;
629         return ret;
630 }
631
632 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
633           A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
634           A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
635           A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
636           A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
637           A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
638           A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
639           A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
640           A5XX_RBBM_INT_0_MASK_CP_SW | \
641           A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
642           A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
643           A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
644
645 static int a5xx_hw_init(struct msm_gpu *gpu)
646 {
647         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
648         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
649         u32 regbit;
650         int ret;
651
652         gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
653
654         if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
655             adreno_is_a540(adreno_gpu))
656                 gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
657
658         /* Make all blocks contribute to the GPU BUSY perf counter */
659         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
660
661         /* Enable RBBM error reporting bits */
662         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
663
664         if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
665                 /*
666                  * Mask out the activity signals from RB1-3 to avoid false
667                  * positives
668                  */
669
670                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
671                         0xF0000000);
672                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
673                         0xFFFFFFFF);
674                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
675                         0xFFFFFFFF);
676                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
677                         0xFFFFFFFF);
678                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
679                         0xFFFFFFFF);
680                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
681                         0xFFFFFFFF);
682                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
683                         0xFFFFFFFF);
684                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
685                         0xFFFFFFFF);
686         }
687
688         /* Enable fault detection */
689         gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
690                 (1 << 30) | 0xFFFF);
691
692         /* Turn on performance counters */
693         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
694
695         /* Select CP0 to always count cycles */
696         gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
697
698         /* Select RBBM0 to countable 6 to get the busy status for devfreq */
699         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
700
701         /* Increase VFD cache access so LRZ and other data gets evicted less */
702         gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
703
704         /* Disable L2 bypass in the UCHE */
705         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
706         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
707         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
708         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
709
710         /* Set the GMEM VA range (0 to gpu->gmem) */
711         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
712         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
713         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
714                 0x00100000 + adreno_gpu->gmem - 1);
715         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
716
717         if (adreno_is_a508(adreno_gpu) || adreno_is_a510(adreno_gpu)) {
718                 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
719                 if (adreno_is_a508(adreno_gpu))
720                         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
721                 else
722                         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
723                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
724                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
725         } else {
726                 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
727                 if (adreno_is_a530(adreno_gpu))
728                         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
729                 else
730                         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
731                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
732                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
733         }
734
735         if (adreno_is_a508(adreno_gpu))
736                 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
737                           (0x100 << 11 | 0x100 << 22));
738         else if (adreno_is_a509(adreno_gpu) || adreno_is_a510(adreno_gpu) ||
739                  adreno_is_a512(adreno_gpu))
740                 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
741                           (0x200 << 11 | 0x200 << 22));
742         else
743                 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
744                           (0x400 << 11 | 0x300 << 22));
745
746         if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
747                 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
748
749         /*
750          * Disable the RB sampler datapath DP2 clock gating optimization
751          * for 1-SP GPUs, as it is enabled by default.
752          */
753         if (adreno_is_a508(adreno_gpu) || adreno_is_a509(adreno_gpu) ||
754             adreno_is_a512(adreno_gpu))
755                 gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, 0, (1 << 9));
756
757         /* Disable UCHE global filter as SP can invalidate/flush independently */
758         gpu_write(gpu, REG_A5XX_UCHE_MODE_CNTL, BIT(29));
759
760         /* Enable USE_RETENTION_FLOPS */
761         gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
762
763         /* Enable ME/PFP split notification */
764         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
765
766         /*
767          *  In A5x, CCU can send context_done event of a particular context to
768          *  UCHE which ultimately reaches CP even when there is valid
769          *  transaction of that context inside CCU. This can let CP to program
770          *  config registers, which will make the "valid transaction" inside
771          *  CCU to be interpreted differently. This can cause gpu fault. This
772          *  bug is fixed in latest A510 revision. To enable this bug fix -
773          *  bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
774          *  (disable). For older A510 version this bit is unused.
775          */
776         if (adreno_is_a510(adreno_gpu))
777                 gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
778
779         /* Enable HWCG */
780         a5xx_set_hwcg(gpu, true);
781
782         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
783
784         /* Set the highest bank bit */
785         if (adreno_is_a540(adreno_gpu))
786                 regbit = 2;
787         else
788                 regbit = 1;
789
790         gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, regbit << 7);
791         gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, regbit << 1);
792
793         if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
794             adreno_is_a540(adreno_gpu))
795                 gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, regbit);
796
797         /* Disable All flat shading optimization (ALLFLATOPTDIS) */
798         gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, (1 << 10));
799
800         /* Protect registers from the CP */
801         gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
802
803         /* RBBM */
804         gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
805         gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
806         gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
807         gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
808         gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
809         gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
810
811         /* Content protect */
812         gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
813                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
814                         16));
815         gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
816                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
817
818         /* CP */
819         gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
820         gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
821         gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
822         gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
823
824         /* RB */
825         gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
826         gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
827
828         /* VPC */
829         gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
830         gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 16));
831
832         /* UCHE */
833         gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
834
835         if (adreno_is_a508(adreno_gpu) || adreno_is_a509(adreno_gpu) ||
836             adreno_is_a510(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
837             adreno_is_a530(adreno_gpu))
838                 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
839                         ADRENO_PROTECT_RW(0x10000, 0x8000));
840
841         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
842         /*
843          * Disable the trusted memory range - we don't actually supported secure
844          * memory rendering at this point in time and we don't want to block off
845          * part of the virtual memory space.
846          */
847         gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
848                 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
849         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
850
851         /* Put the GPU into 64 bit by default */
852         gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
853         gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
854         gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
855         gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
856         gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
857         gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
858         gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
859         gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
860         gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
861         gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
862         gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
863         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
864
865         /*
866          * VPC corner case with local memory load kill leads to corrupt
867          * internal state. Normal Disable does not work for all a5x chips.
868          * So do the following setting to disable it.
869          */
870         if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
871                 gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
872                 gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
873         }
874
875         ret = adreno_hw_init(gpu);
876         if (ret)
877                 return ret;
878
879         if (!(adreno_is_a508(adreno_gpu) || adreno_is_a509(adreno_gpu) ||
880               adreno_is_a510(adreno_gpu) || adreno_is_a512(adreno_gpu)))
881                 a5xx_gpmu_ucode_init(gpu);
882
883         ret = a5xx_ucode_init(gpu);
884         if (ret)
885                 return ret;
886
887         /* Set the ringbuffer address */
888         gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
889                 gpu->rb[0]->iova);
890
891         /*
892          * If the microcode supports the WHERE_AM_I opcode then we can use that
893          * in lieu of the RPTR shadow and enable preemption. Otherwise, we
894          * can't safely use the RPTR shadow or preemption. In either case, the
895          * RPTR shadow should be disabled in hardware.
896          */
897         gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
898                 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
899
900         /* Create a privileged buffer for the RPTR shadow */
901         if (a5xx_gpu->has_whereami) {
902                 if (!a5xx_gpu->shadow_bo) {
903                         a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
904                                 sizeof(u32) * gpu->nr_rings,
905                                 MSM_BO_WC | MSM_BO_MAP_PRIV,
906                                 gpu->aspace, &a5xx_gpu->shadow_bo,
907                                 &a5xx_gpu->shadow_iova);
908
909                         if (IS_ERR(a5xx_gpu->shadow))
910                                 return PTR_ERR(a5xx_gpu->shadow);
911                 }
912
913                 gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
914                         REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0]));
915         } else if (gpu->nr_rings > 1) {
916                 /* Disable preemption if WHERE_AM_I isn't available */
917                 a5xx_preempt_fini(gpu);
918                 gpu->nr_rings = 1;
919         }
920
921         a5xx_preempt_hw_init(gpu);
922
923         /* Disable the interrupts through the initial bringup stage */
924         gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
925
926         /* Clear ME_HALT to start the micro engine */
927         gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
928         ret = a5xx_me_init(gpu);
929         if (ret)
930                 return ret;
931
932         ret = a5xx_power_init(gpu);
933         if (ret)
934                 return ret;
935
936         /*
937          * Send a pipeline event stat to get misbehaving counters to start
938          * ticking correctly
939          */
940         if (adreno_is_a530(adreno_gpu)) {
941                 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
942                 OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
943
944                 a5xx_flush(gpu, gpu->rb[0], true);
945                 if (!a5xx_idle(gpu, gpu->rb[0]))
946                         return -EINVAL;
947         }
948
949         /*
950          * If the chip that we are using does support loading one, then
951          * try to load a zap shader into the secure world. If successful
952          * we can use the CP to switch out of secure mode. If not then we
953          * have no resource but to try to switch ourselves out manually. If we
954          * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
955          * be blocked and a permissions violation will soon follow.
956          */
957         ret = a5xx_zap_shader_init(gpu);
958         if (!ret) {
959                 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
960                 OUT_RING(gpu->rb[0], 0x00000000);
961
962                 a5xx_flush(gpu, gpu->rb[0], true);
963                 if (!a5xx_idle(gpu, gpu->rb[0]))
964                         return -EINVAL;
965         } else if (ret == -ENODEV) {
966                 /*
967                  * This device does not use zap shader (but print a warning
968                  * just in case someone got their dt wrong.. hopefully they
969                  * have a debug UART to realize the error of their ways...
970                  * if you mess this up you are about to crash horribly)
971                  */
972                 dev_warn_once(gpu->dev->dev,
973                         "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
974                 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
975         } else {
976                 return ret;
977         }
978
979         /* Last step - yield the ringbuffer */
980         a5xx_preempt_start(gpu);
981
982         return 0;
983 }
984
985 static void a5xx_recover(struct msm_gpu *gpu)
986 {
987         int i;
988
989         adreno_dump_info(gpu);
990
991         for (i = 0; i < 8; i++) {
992                 printk("CP_SCRATCH_REG%d: %u\n", i,
993                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
994         }
995
996         if (hang_debug)
997                 a5xx_dump(gpu);
998
999         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
1000         gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
1001         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
1002         adreno_recover(gpu);
1003 }
1004
1005 static void a5xx_destroy(struct msm_gpu *gpu)
1006 {
1007         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1008         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1009
1010         DBG("%s", gpu->name);
1011
1012         a5xx_preempt_fini(gpu);
1013
1014         if (a5xx_gpu->pm4_bo) {
1015                 msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
1016                 drm_gem_object_put(a5xx_gpu->pm4_bo);
1017         }
1018
1019         if (a5xx_gpu->pfp_bo) {
1020                 msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
1021                 drm_gem_object_put(a5xx_gpu->pfp_bo);
1022         }
1023
1024         if (a5xx_gpu->gpmu_bo) {
1025                 msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
1026                 drm_gem_object_put(a5xx_gpu->gpmu_bo);
1027         }
1028
1029         if (a5xx_gpu->shadow_bo) {
1030                 msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace);
1031                 drm_gem_object_put(a5xx_gpu->shadow_bo);
1032         }
1033
1034         adreno_gpu_cleanup(adreno_gpu);
1035         kfree(a5xx_gpu);
1036 }
1037
1038 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
1039 {
1040         if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
1041                 return false;
1042
1043         /*
1044          * Nearly every abnormality ends up pausing the GPU and triggering a
1045          * fault so we can safely just watch for this one interrupt to fire
1046          */
1047         return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
1048                 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
1049 }
1050
1051 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1052 {
1053         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1054         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1055
1056         if (ring != a5xx_gpu->cur_ring) {
1057                 WARN(1, "Tried to idle a non-current ringbuffer\n");
1058                 return false;
1059         }
1060
1061         /* wait for CP to drain ringbuffer: */
1062         if (!adreno_idle(gpu, ring))
1063                 return false;
1064
1065         if (spin_until(_a5xx_check_idle(gpu))) {
1066                 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
1067                         gpu->name, __builtin_return_address(0),
1068                         gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1069                         gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
1070                         gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1071                         gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
1072                 return false;
1073         }
1074
1075         return true;
1076 }
1077
1078 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1079 {
1080         struct msm_gpu *gpu = arg;
1081         pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
1082                         iova, flags,
1083                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
1084                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
1085                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
1086                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
1087
1088         return 0;
1089 }
1090
1091 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
1092 {
1093         u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
1094
1095         if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
1096                 u32 val;
1097
1098                 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
1099
1100                 /*
1101                  * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
1102                  * read it twice
1103                  */
1104
1105                 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
1106                 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
1107
1108                 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
1109                         val);
1110         }
1111
1112         if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
1113                 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
1114                         gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
1115
1116         if (status & A5XX_CP_INT_CP_DMA_ERROR)
1117                 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
1118
1119         if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
1120                 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
1121
1122                 dev_err_ratelimited(gpu->dev->dev,
1123                         "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
1124                         val & (1 << 24) ? "WRITE" : "READ",
1125                         (val & 0xFFFFF) >> 2, val);
1126         }
1127
1128         if (status & A5XX_CP_INT_CP_AHB_ERROR) {
1129                 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
1130                 const char *access[16] = { "reserved", "reserved",
1131                         "timestamp lo", "timestamp hi", "pfp read", "pfp write",
1132                         "", "", "me read", "me write", "", "", "crashdump read",
1133                         "crashdump write" };
1134
1135                 dev_err_ratelimited(gpu->dev->dev,
1136                         "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
1137                         status & 0xFFFFF, access[(status >> 24) & 0xF],
1138                         (status & (1 << 31)), status);
1139         }
1140 }
1141
1142 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
1143 {
1144         if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
1145                 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
1146
1147                 dev_err_ratelimited(gpu->dev->dev,
1148                         "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
1149                         val & (1 << 28) ? "WRITE" : "READ",
1150                         (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
1151                         (val >> 24) & 0xF);
1152
1153                 /* Clear the error */
1154                 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
1155
1156                 /* Clear the interrupt */
1157                 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1158                         A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1159         }
1160
1161         if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
1162                 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
1163
1164         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
1165                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
1166                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
1167
1168         if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
1169                 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
1170                         gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
1171
1172         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
1173                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
1174                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
1175
1176         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1177                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1178
1179         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1180                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1181 }
1182
1183 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1184 {
1185         uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1186
1187         addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1188
1189         dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1190                 addr);
1191 }
1192
1193 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1194 {
1195         dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1196 }
1197
1198 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1199 {
1200         struct drm_device *dev = gpu->dev;
1201         struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1202
1203         /*
1204          * If stalled on SMMU fault, we could trip the GPU's hang detection,
1205          * but the fault handler will trigger the devcore dump, and we want
1206          * to otherwise resume normally rather than killing the submit, so
1207          * just bail.
1208          */
1209         if (gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24))
1210                 return;
1211
1212         DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1213                 ring ? ring->id : -1, ring ? ring->seqno : 0,
1214                 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1215                 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1216                 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1217                 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1218                 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1219                 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1220                 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1221
1222         /* Turn off the hangcheck timer to keep it from bothering us */
1223         del_timer(&gpu->hangcheck_timer);
1224
1225         kthread_queue_work(gpu->worker, &gpu->recover_work);
1226 }
1227
1228 #define RBBM_ERROR_MASK \
1229         (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1230         A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1231         A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1232         A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1233         A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1234         A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1235
1236 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1237 {
1238         u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1239
1240         /*
1241          * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1242          * before the source is cleared the interrupt will storm.
1243          */
1244         gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1245                 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1246
1247         /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1248         if (status & RBBM_ERROR_MASK)
1249                 a5xx_rbbm_err_irq(gpu, status);
1250
1251         if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1252                 a5xx_cp_err_irq(gpu);
1253
1254         if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1255                 a5xx_fault_detect_irq(gpu);
1256
1257         if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1258                 a5xx_uche_err_irq(gpu);
1259
1260         if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1261                 a5xx_gpmu_err_irq(gpu);
1262
1263         if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1264                 a5xx_preempt_trigger(gpu);
1265                 msm_gpu_retire(gpu);
1266         }
1267
1268         if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1269                 a5xx_preempt_irq(gpu);
1270
1271         return IRQ_HANDLED;
1272 }
1273
1274 static const u32 a5xx_registers[] = {
1275         0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1276         0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1277         0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1278         0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1279         0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1280         0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1281         0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1282         0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1283         0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1284         0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1285         0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1286         0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1287         0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1288         0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1289         0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1290         0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1291         0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1292         0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1293         0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1294         0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1295         0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1296         0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1297         0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1298         0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1299         0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1300         0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1301         0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1302         0xAC60, 0xAC60, ~0,
1303 };
1304
1305 static void a5xx_dump(struct msm_gpu *gpu)
1306 {
1307         DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1308                 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1309         adreno_dump(gpu);
1310 }
1311
1312 static int a5xx_pm_resume(struct msm_gpu *gpu)
1313 {
1314         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1315         int ret;
1316
1317         /* Turn on the core power */
1318         ret = msm_gpu_pm_resume(gpu);
1319         if (ret)
1320                 return ret;
1321
1322         /* Adreno 508, 509, 510, 512 needs manual RBBM sus/res control */
1323         if (!(adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu))) {
1324                 /* Halt the sp_input_clk at HM level */
1325                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1326                 a5xx_set_hwcg(gpu, true);
1327                 /* Turn on sp_input_clk at HM level */
1328                 gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1329                 return 0;
1330         }
1331
1332         /* Turn the RBCCU domain first to limit the chances of voltage droop */
1333         gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1334
1335         /* Wait 3 usecs before polling */
1336         udelay(3);
1337
1338         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1339                 (1 << 20), (1 << 20));
1340         if (ret) {
1341                 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1342                         gpu->name,
1343                         gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1344                 return ret;
1345         }
1346
1347         /* Turn on the SP domain */
1348         gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1349         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1350                 (1 << 20), (1 << 20));
1351         if (ret)
1352                 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1353                         gpu->name);
1354
1355         return ret;
1356 }
1357
1358 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1359 {
1360         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1361         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1362         u32 mask = 0xf;
1363         int i, ret;
1364
1365         /* A508, A510 have 3 XIN ports in VBIF */
1366         if (adreno_is_a508(adreno_gpu) || adreno_is_a510(adreno_gpu))
1367                 mask = 0x7;
1368
1369         /* Clear the VBIF pipe before shutting down */
1370         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1371         spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1372                                 mask) == mask);
1373
1374         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1375
1376         /*
1377          * Reset the VBIF before power collapse to avoid issue with FIFO
1378          * entries on Adreno A510 and A530 (the others will tend to lock up)
1379          */
1380         if (adreno_is_a510(adreno_gpu) || adreno_is_a530(adreno_gpu)) {
1381                 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1382                 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1383         }
1384
1385         ret = msm_gpu_pm_suspend(gpu);
1386         if (ret)
1387                 return ret;
1388
1389         if (a5xx_gpu->has_whereami)
1390                 for (i = 0; i < gpu->nr_rings; i++)
1391                         a5xx_gpu->shadow[i] = 0;
1392
1393         return 0;
1394 }
1395
1396 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1397 {
1398         *value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
1399                 REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
1400
1401         return 0;
1402 }
1403
1404 struct a5xx_crashdumper {
1405         void *ptr;
1406         struct drm_gem_object *bo;
1407         u64 iova;
1408 };
1409
1410 struct a5xx_gpu_state {
1411         struct msm_gpu_state base;
1412         u32 *hlsqregs;
1413 };
1414
1415 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1416                 struct a5xx_crashdumper *dumper)
1417 {
1418         dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1419                 SZ_1M, MSM_BO_WC, gpu->aspace,
1420                 &dumper->bo, &dumper->iova);
1421
1422         if (!IS_ERR(dumper->ptr))
1423                 msm_gem_object_set_name(dumper->bo, "crashdump");
1424
1425         return PTR_ERR_OR_ZERO(dumper->ptr);
1426 }
1427
1428 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1429                 struct a5xx_crashdumper *dumper)
1430 {
1431         u32 val;
1432
1433         if (IS_ERR_OR_NULL(dumper->ptr))
1434                 return -EINVAL;
1435
1436         gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1437                 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1438
1439         gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1440
1441         return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1442                 val & 0x04, 100, 10000);
1443 }
1444
1445 /*
1446  * These are a list of the registers that need to be read through the HLSQ
1447  * aperture through the crashdumper.  These are not nominally accessible from
1448  * the CPU on a secure platform.
1449  */
1450 static const struct {
1451         u32 type;
1452         u32 regoffset;
1453         u32 count;
1454 } a5xx_hlsq_aperture_regs[] = {
1455         { 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1456         { 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1457         { 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1458         { 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1459         { 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1460         { 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1461         { 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1462         { 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1463         { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1464         { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1465         { 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1466         { 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1467         { 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1468         { 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1469         { 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1470 };
1471
1472 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1473                 struct a5xx_gpu_state *a5xx_state)
1474 {
1475         struct a5xx_crashdumper dumper = { 0 };
1476         u32 offset, count = 0;
1477         u64 *ptr;
1478         int i;
1479
1480         if (a5xx_crashdumper_init(gpu, &dumper))
1481                 return;
1482
1483         /* The script will be written at offset 0 */
1484         ptr = dumper.ptr;
1485
1486         /* Start writing the data at offset 256k */
1487         offset = dumper.iova + (256 * SZ_1K);
1488
1489         /* Count how many additional registers to get from the HLSQ aperture */
1490         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1491                 count += a5xx_hlsq_aperture_regs[i].count;
1492
1493         a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1494         if (!a5xx_state->hlsqregs)
1495                 return;
1496
1497         /* Build the crashdump script */
1498         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1499                 u32 type = a5xx_hlsq_aperture_regs[i].type;
1500                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1501
1502                 /* Write the register to select the desired bank */
1503                 *ptr++ = ((u64) type << 8);
1504                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1505                         (1 << 21) | 1;
1506
1507                 *ptr++ = offset;
1508                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1509                         | c;
1510
1511                 offset += c * sizeof(u32);
1512         }
1513
1514         /* Write two zeros to close off the script */
1515         *ptr++ = 0;
1516         *ptr++ = 0;
1517
1518         if (a5xx_crashdumper_run(gpu, &dumper)) {
1519                 kfree(a5xx_state->hlsqregs);
1520                 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1521                 return;
1522         }
1523
1524         /* Copy the data from the crashdumper to the state */
1525         memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1526                 count * sizeof(u32));
1527
1528         msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1529 }
1530
1531 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1532 {
1533         struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1534                         GFP_KERNEL);
1535         bool stalled = !!(gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24));
1536
1537         if (!a5xx_state)
1538                 return ERR_PTR(-ENOMEM);
1539
1540         /* Temporarily disable hardware clock gating before reading the hw */
1541         a5xx_set_hwcg(gpu, false);
1542
1543         /* First get the generic state from the adreno core */
1544         adreno_gpu_state_get(gpu, &(a5xx_state->base));
1545
1546         a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1547
1548         /*
1549          * Get the HLSQ regs with the help of the crashdumper, but only if
1550          * we are not stalled in an iommu fault (in which case the crashdumper
1551          * would not have access to memory)
1552          */
1553         if (!stalled)
1554                 a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1555
1556         a5xx_set_hwcg(gpu, true);
1557
1558         return &a5xx_state->base;
1559 }
1560
1561 static void a5xx_gpu_state_destroy(struct kref *kref)
1562 {
1563         struct msm_gpu_state *state = container_of(kref,
1564                 struct msm_gpu_state, ref);
1565         struct a5xx_gpu_state *a5xx_state = container_of(state,
1566                 struct a5xx_gpu_state, base);
1567
1568         kfree(a5xx_state->hlsqregs);
1569
1570         adreno_gpu_state_destroy(state);
1571         kfree(a5xx_state);
1572 }
1573
1574 static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1575 {
1576         if (IS_ERR_OR_NULL(state))
1577                 return 1;
1578
1579         return kref_put(&state->ref, a5xx_gpu_state_destroy);
1580 }
1581
1582
1583 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1584 static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1585                       struct drm_printer *p)
1586 {
1587         int i, j;
1588         u32 pos = 0;
1589         struct a5xx_gpu_state *a5xx_state = container_of(state,
1590                 struct a5xx_gpu_state, base);
1591
1592         if (IS_ERR_OR_NULL(state))
1593                 return;
1594
1595         adreno_show(gpu, state, p);
1596
1597         /* Dump the additional a5xx HLSQ registers */
1598         if (!a5xx_state->hlsqregs)
1599                 return;
1600
1601         drm_printf(p, "registers-hlsq:\n");
1602
1603         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1604                 u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1605                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1606
1607                 for (j = 0; j < c; j++, pos++, o++) {
1608                         /*
1609                          * To keep the crashdump simple we pull the entire range
1610                          * for each register type but not all of the registers
1611                          * in the range are valid. Fortunately invalid registers
1612                          * stick out like a sore thumb with a value of
1613                          * 0xdeadbeef
1614                          */
1615                         if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1616                                 continue;
1617
1618                         drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1619                                 o << 2, a5xx_state->hlsqregs[pos]);
1620                 }
1621         }
1622 }
1623 #endif
1624
1625 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1626 {
1627         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1628         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1629
1630         return a5xx_gpu->cur_ring;
1631 }
1632
1633 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1634 {
1635         u64 busy_cycles, busy_time;
1636
1637         /* Only read the gpu busy if the hardware is already active */
1638         if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0)
1639                 return 0;
1640
1641         busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1642                         REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1643
1644         busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1645         do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1646
1647         gpu->devfreq.busy_cycles = busy_cycles;
1648
1649         pm_runtime_put(&gpu->pdev->dev);
1650
1651         if (WARN_ON(busy_time > ~0LU))
1652                 return ~0LU;
1653
1654         return (unsigned long)busy_time;
1655 }
1656
1657 static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1658 {
1659         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1660         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1661
1662         if (a5xx_gpu->has_whereami)
1663                 return a5xx_gpu->shadow[ring->id];
1664
1665         return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR);
1666 }
1667
1668 static const struct adreno_gpu_funcs funcs = {
1669         .base = {
1670                 .get_param = adreno_get_param,
1671                 .hw_init = a5xx_hw_init,
1672                 .pm_suspend = a5xx_pm_suspend,
1673                 .pm_resume = a5xx_pm_resume,
1674                 .recover = a5xx_recover,
1675                 .submit = a5xx_submit,
1676                 .active_ring = a5xx_active_ring,
1677                 .irq = a5xx_irq,
1678                 .destroy = a5xx_destroy,
1679 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1680                 .show = a5xx_show,
1681 #endif
1682 #if defined(CONFIG_DEBUG_FS)
1683                 .debugfs_init = a5xx_debugfs_init,
1684 #endif
1685                 .gpu_busy = a5xx_gpu_busy,
1686                 .gpu_state_get = a5xx_gpu_state_get,
1687                 .gpu_state_put = a5xx_gpu_state_put,
1688                 .create_address_space = adreno_iommu_create_address_space,
1689                 .get_rptr = a5xx_get_rptr,
1690         },
1691         .get_timestamp = a5xx_get_timestamp,
1692 };
1693
1694 static void check_speed_bin(struct device *dev)
1695 {
1696         struct nvmem_cell *cell;
1697         u32 val;
1698
1699         /*
1700          * If the OPP table specifies a opp-supported-hw property then we have
1701          * to set something with dev_pm_opp_set_supported_hw() or the table
1702          * doesn't get populated so pick an arbitrary value that should
1703          * ensure the default frequencies are selected but not conflict with any
1704          * actual bins
1705          */
1706         val = 0x80;
1707
1708         cell = nvmem_cell_get(dev, "speed_bin");
1709
1710         if (!IS_ERR(cell)) {
1711                 void *buf = nvmem_cell_read(cell, NULL);
1712
1713                 if (!IS_ERR(buf)) {
1714                         u8 bin = *((u8 *) buf);
1715
1716                         val = (1 << bin);
1717                         kfree(buf);
1718                 }
1719
1720                 nvmem_cell_put(cell);
1721         }
1722
1723         devm_pm_opp_set_supported_hw(dev, &val, 1);
1724 }
1725
1726 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1727 {
1728         struct msm_drm_private *priv = dev->dev_private;
1729         struct platform_device *pdev = priv->gpu_pdev;
1730         struct a5xx_gpu *a5xx_gpu = NULL;
1731         struct adreno_gpu *adreno_gpu;
1732         struct msm_gpu *gpu;
1733         int ret;
1734
1735         if (!pdev) {
1736                 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1737                 return ERR_PTR(-ENXIO);
1738         }
1739
1740         a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1741         if (!a5xx_gpu)
1742                 return ERR_PTR(-ENOMEM);
1743
1744         adreno_gpu = &a5xx_gpu->base;
1745         gpu = &adreno_gpu->base;
1746
1747         adreno_gpu->registers = a5xx_registers;
1748
1749         a5xx_gpu->lm_leakage = 0x4E001A;
1750
1751         check_speed_bin(&pdev->dev);
1752
1753         ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1754         if (ret) {
1755                 a5xx_destroy(&(a5xx_gpu->base.base));
1756                 return ERR_PTR(ret);
1757         }
1758
1759         if (gpu->aspace)
1760                 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1761
1762         /* Set up the preemption specific bits and pieces for each ringbuffer */
1763         a5xx_preempt_init(gpu);
1764
1765         return gpu;
1766 }