Merge tag 'drm-misc-next-fixes-2021-09-09' of git://anongit.freedesktop.org/drm/drm...
[linux-2.6-microblaze.git] / drivers / gpu / drm / msm / adreno / a5xx_gpu.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3  */
4
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/cpumask.h>
8 #include <linux/qcom_scm.h>
9 #include <linux/pm_opp.h>
10 #include <linux/nvmem-consumer.h>
11 #include <linux/slab.h>
12 #include "msm_gem.h"
13 #include "msm_mmu.h"
14 #include "a5xx_gpu.h"
15
16 extern bool hang_debug;
17 static void a5xx_dump(struct msm_gpu *gpu);
18
19 #define GPU_PAS_ID 13
20
21 static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
22 {
23         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
24         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
25
26         if (a5xx_gpu->has_whereami) {
27                 OUT_PKT7(ring, CP_WHERE_AM_I, 2);
28                 OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
29                 OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
30         }
31 }
32
33 void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
34                 bool sync)
35 {
36         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
37         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
38         uint32_t wptr;
39         unsigned long flags;
40
41         /*
42          * Most flush operations need to issue a WHERE_AM_I opcode to sync up
43          * the rptr shadow
44          */
45         if (sync)
46                 update_shadow_rptr(gpu, ring);
47
48         spin_lock_irqsave(&ring->preempt_lock, flags);
49
50         /* Copy the shadow to the actual register */
51         ring->cur = ring->next;
52
53         /* Make sure to wrap wptr if we need to */
54         wptr = get_wptr(ring);
55
56         spin_unlock_irqrestore(&ring->preempt_lock, flags);
57
58         /* Make sure everything is posted before making a decision */
59         mb();
60
61         /* Update HW if this is the current ring and we are not in preempt */
62         if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
63                 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
64 }
65
66 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit)
67 {
68         struct msm_drm_private *priv = gpu->dev->dev_private;
69         struct msm_ringbuffer *ring = submit->ring;
70         struct msm_gem_object *obj;
71         uint32_t *ptr, dwords;
72         unsigned int i;
73
74         for (i = 0; i < submit->nr_cmds; i++) {
75                 switch (submit->cmd[i].type) {
76                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
77                         break;
78                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
79                         if (priv->lastctx == submit->queue->ctx)
80                                 break;
81                         fallthrough;
82                 case MSM_SUBMIT_CMD_BUF:
83                         /* copy commands into RB: */
84                         obj = submit->bos[submit->cmd[i].idx].obj;
85                         dwords = submit->cmd[i].size;
86
87                         ptr = msm_gem_get_vaddr(&obj->base);
88
89                         /* _get_vaddr() shouldn't fail at this point,
90                          * since we've already mapped it once in
91                          * submit_reloc()
92                          */
93                         if (WARN_ON(!ptr))
94                                 return;
95
96                         for (i = 0; i < dwords; i++) {
97                                 /* normally the OUT_PKTn() would wait
98                                  * for space for the packet.  But since
99                                  * we just OUT_RING() the whole thing,
100                                  * need to call adreno_wait_ring()
101                                  * ourself:
102                                  */
103                                 adreno_wait_ring(ring, 1);
104                                 OUT_RING(ring, ptr[i]);
105                         }
106
107                         msm_gem_put_vaddr(&obj->base);
108
109                         break;
110                 }
111         }
112
113         a5xx_flush(gpu, ring, true);
114         a5xx_preempt_trigger(gpu);
115
116         /* we might not necessarily have a cmd from userspace to
117          * trigger an event to know that submit has completed, so
118          * do this manually:
119          */
120         a5xx_idle(gpu, ring);
121         ring->memptrs->fence = submit->seqno;
122         msm_gpu_retire(gpu);
123 }
124
125 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
126 {
127         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
128         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
129         struct msm_drm_private *priv = gpu->dev->dev_private;
130         struct msm_ringbuffer *ring = submit->ring;
131         unsigned int i, ibs = 0;
132
133         if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
134                 priv->lastctx = NULL;
135                 a5xx_submit_in_rb(gpu, submit);
136                 return;
137         }
138
139         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
140         OUT_RING(ring, 0x02);
141
142         /* Turn off protected mode to write to special registers */
143         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
144         OUT_RING(ring, 0);
145
146         /* Set the save preemption record for the ring/command */
147         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
148         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
149         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
150
151         /* Turn back on protected mode */
152         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
153         OUT_RING(ring, 1);
154
155         /* Enable local preemption for finegrain preemption */
156         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
157         OUT_RING(ring, 0x02);
158
159         /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
160         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
161         OUT_RING(ring, 0x02);
162
163         /* Submit the commands */
164         for (i = 0; i < submit->nr_cmds; i++) {
165                 switch (submit->cmd[i].type) {
166                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
167                         break;
168                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
169                         if (priv->lastctx == submit->queue->ctx)
170                                 break;
171                         fallthrough;
172                 case MSM_SUBMIT_CMD_BUF:
173                         OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
174                         OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
175                         OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
176                         OUT_RING(ring, submit->cmd[i].size);
177                         ibs++;
178                         break;
179                 }
180
181                 /*
182                  * Periodically update shadow-wptr if needed, so that we
183                  * can see partial progress of submits with large # of
184                  * cmds.. otherwise we could needlessly stall waiting for
185                  * ringbuffer state, simply due to looking at a shadow
186                  * rptr value that has not been updated
187                  */
188                 if ((ibs % 32) == 0)
189                         update_shadow_rptr(gpu, ring);
190         }
191
192         /*
193          * Write the render mode to NULL (0) to indicate to the CP that the IBs
194          * are done rendering - otherwise a lucky preemption would start
195          * replaying from the last checkpoint
196          */
197         OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
198         OUT_RING(ring, 0);
199         OUT_RING(ring, 0);
200         OUT_RING(ring, 0);
201         OUT_RING(ring, 0);
202         OUT_RING(ring, 0);
203
204         /* Turn off IB level preemptions */
205         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
206         OUT_RING(ring, 0x01);
207
208         /* Write the fence to the scratch register */
209         OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
210         OUT_RING(ring, submit->seqno);
211
212         /*
213          * Execute a CACHE_FLUSH_TS event. This will ensure that the
214          * timestamp is written to the memory and then triggers the interrupt
215          */
216         OUT_PKT7(ring, CP_EVENT_WRITE, 4);
217         OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
218                 CP_EVENT_WRITE_0_IRQ);
219         OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
220         OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
221         OUT_RING(ring, submit->seqno);
222
223         /* Yield the floor on command completion */
224         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
225         /*
226          * If dword[2:1] are non zero, they specify an address for the CP to
227          * write the value of dword[3] to on preemption complete. Write 0 to
228          * skip the write
229          */
230         OUT_RING(ring, 0x00);
231         OUT_RING(ring, 0x00);
232         /* Data value - not used if the address above is 0 */
233         OUT_RING(ring, 0x01);
234         /* Set bit 0 to trigger an interrupt on preempt complete */
235         OUT_RING(ring, 0x01);
236
237         /* A WHERE_AM_I packet is not needed after a YIELD */
238         a5xx_flush(gpu, ring, false);
239
240         /* Check to see if we need to start preemption */
241         a5xx_preempt_trigger(gpu);
242 }
243
244 static const struct adreno_five_hwcg_regs {
245         u32 offset;
246         u32 value;
247 } a5xx_hwcg[] = {
248         {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
249         {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
250         {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
251         {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
252         {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
253         {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
254         {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
255         {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
256         {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
257         {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
258         {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
259         {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
260         {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
261         {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
262         {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
263         {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
264         {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
265         {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
266         {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
267         {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
268         {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
269         {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
270         {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
271         {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
272         {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
273         {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
274         {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
275         {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
276         {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
277         {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
278         {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
279         {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
280         {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
281         {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
282         {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
283         {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
284         {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
285         {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
286         {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
287         {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
288         {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
289         {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
290         {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
291         {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
292         {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
293         {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
294         {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
295         {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
296         {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
297         {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
298         {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
299         {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
300         {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
301         {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
302         {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
303         {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
304         {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
305         {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
306         {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
307         {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
308         {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
309         {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
310         {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
311         {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
312         {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
313         {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
314         {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
315         {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
316         {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
317         {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
318         {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
319         {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
320         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
321         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
322         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
323         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
324         {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
325         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
326         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
327         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
328         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
329         {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
330         {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
331         {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
332         {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
333         {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
334         {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
335         {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
336         {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
337         {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
338         {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
339         {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
340 }, a50x_hwcg[] = {
341         {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
342         {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
343         {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
344         {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
345         {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
346         {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
347         {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
348         {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
349         {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
350         {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
351         {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
352         {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
353         {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
354         {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
355         {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
356         {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
357         {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
358         {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00FFFFF4},
359         {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
360         {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
361         {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
362         {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
363         {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
364         {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
365         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
366         {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
367         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
368         {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
369         {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
370         {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
371         {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
372         {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
373         {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
374         {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
375         {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
376         {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
377         {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
378         {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
379 }, a512_hwcg[] = {
380         {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
381         {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
382         {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
383         {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
384         {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
385         {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
386         {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
387         {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
388         {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
389         {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
390         {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
391         {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
392         {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
393         {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
394         {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
395         {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
396         {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
397         {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
398         {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
399         {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
400         {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
401         {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
402         {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
403         {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
404         {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
405         {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
406         {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
407         {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
408         {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
409         {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
410         {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
411         {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
412         {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
413         {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
414         {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
415         {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
416         {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
417         {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
418         {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
419         {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
420         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
421         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
422         {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
423         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
424         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
425         {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
426         {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
427         {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
428         {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
429         {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
430         {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
431         {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
432         {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
433         {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
434         {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
435         {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
436 };
437
438 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
439 {
440         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
441         const struct adreno_five_hwcg_regs *regs;
442         unsigned int i, sz;
443
444         if (adreno_is_a508(adreno_gpu)) {
445                 regs = a50x_hwcg;
446                 sz = ARRAY_SIZE(a50x_hwcg);
447         } else if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu)) {
448                 regs = a512_hwcg;
449                 sz = ARRAY_SIZE(a512_hwcg);
450         } else {
451                 regs = a5xx_hwcg;
452                 sz = ARRAY_SIZE(a5xx_hwcg);
453         }
454
455         for (i = 0; i < sz; i++)
456                 gpu_write(gpu, regs[i].offset,
457                           state ? regs[i].value : 0);
458
459         if (adreno_is_a540(adreno_gpu)) {
460                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
461                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
462         }
463
464         gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
465         gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
466 }
467
468 static int a5xx_me_init(struct msm_gpu *gpu)
469 {
470         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
471         struct msm_ringbuffer *ring = gpu->rb[0];
472
473         OUT_PKT7(ring, CP_ME_INIT, 8);
474
475         OUT_RING(ring, 0x0000002F);
476
477         /* Enable multiple hardware contexts */
478         OUT_RING(ring, 0x00000003);
479
480         /* Enable error detection */
481         OUT_RING(ring, 0x20000000);
482
483         /* Don't enable header dump */
484         OUT_RING(ring, 0x00000000);
485         OUT_RING(ring, 0x00000000);
486
487         /* Specify workarounds for various microcode issues */
488         if (adreno_is_a530(adreno_gpu)) {
489                 /* Workaround for token end syncs
490                  * Force a WFI after every direct-render 3D mode draw and every
491                  * 2D mode 3 draw
492                  */
493                 OUT_RING(ring, 0x0000000B);
494         } else if (adreno_is_a510(adreno_gpu)) {
495                 /* Workaround for token and syncs */
496                 OUT_RING(ring, 0x00000001);
497         } else {
498                 /* No workarounds enabled */
499                 OUT_RING(ring, 0x00000000);
500         }
501
502         OUT_RING(ring, 0x00000000);
503         OUT_RING(ring, 0x00000000);
504
505         a5xx_flush(gpu, ring, true);
506         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
507 }
508
509 static int a5xx_preempt_start(struct msm_gpu *gpu)
510 {
511         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
512         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
513         struct msm_ringbuffer *ring = gpu->rb[0];
514
515         if (gpu->nr_rings == 1)
516                 return 0;
517
518         /* Turn off protected mode to write to special registers */
519         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
520         OUT_RING(ring, 0);
521
522         /* Set the save preemption record for the ring/command */
523         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
524         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
525         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
526
527         /* Turn back on protected mode */
528         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
529         OUT_RING(ring, 1);
530
531         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
532         OUT_RING(ring, 0x00);
533
534         OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
535         OUT_RING(ring, 0x01);
536
537         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
538         OUT_RING(ring, 0x01);
539
540         /* Yield the floor on command completion */
541         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
542         OUT_RING(ring, 0x00);
543         OUT_RING(ring, 0x00);
544         OUT_RING(ring, 0x01);
545         OUT_RING(ring, 0x01);
546
547         /* The WHERE_AMI_I packet is not needed after a YIELD is issued */
548         a5xx_flush(gpu, ring, false);
549
550         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
551 }
552
553 static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu,
554                 struct drm_gem_object *obj)
555 {
556         u32 *buf = msm_gem_get_vaddr(obj);
557
558         if (IS_ERR(buf))
559                 return;
560
561         /*
562          * If the lowest nibble is 0xa that is an indication that this microcode
563          * has been patched. The actual version is in dword [3] but we only care
564          * about the patchlevel which is the lowest nibble of dword [3]
565          */
566         if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
567                 a5xx_gpu->has_whereami = true;
568
569         msm_gem_put_vaddr(obj);
570 }
571
572 static int a5xx_ucode_init(struct msm_gpu *gpu)
573 {
574         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
575         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
576         int ret;
577
578         if (!a5xx_gpu->pm4_bo) {
579                 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
580                         adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
581
582
583                 if (IS_ERR(a5xx_gpu->pm4_bo)) {
584                         ret = PTR_ERR(a5xx_gpu->pm4_bo);
585                         a5xx_gpu->pm4_bo = NULL;
586                         DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
587                                 ret);
588                         return ret;
589                 }
590
591                 msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
592         }
593
594         if (!a5xx_gpu->pfp_bo) {
595                 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
596                         adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
597
598                 if (IS_ERR(a5xx_gpu->pfp_bo)) {
599                         ret = PTR_ERR(a5xx_gpu->pfp_bo);
600                         a5xx_gpu->pfp_bo = NULL;
601                         DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
602                                 ret);
603                         return ret;
604                 }
605
606                 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
607                 a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
608         }
609
610         gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
611                 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
612
613         gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
614                 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
615
616         return 0;
617 }
618
619 #define SCM_GPU_ZAP_SHADER_RESUME 0
620
621 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
622 {
623         int ret;
624
625         ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
626         if (ret)
627                 DRM_ERROR("%s: zap-shader resume failed: %d\n",
628                         gpu->name, ret);
629
630         return ret;
631 }
632
633 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
634 {
635         static bool loaded;
636         int ret;
637
638         /*
639          * If the zap shader is already loaded into memory we just need to kick
640          * the remote processor to reinitialize it
641          */
642         if (loaded)
643                 return a5xx_zap_shader_resume(gpu);
644
645         ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
646
647         loaded = !ret;
648         return ret;
649 }
650
651 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
652           A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
653           A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
654           A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
655           A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
656           A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
657           A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
658           A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
659           A5XX_RBBM_INT_0_MASK_CP_SW | \
660           A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
661           A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
662           A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
663
664 static int a5xx_hw_init(struct msm_gpu *gpu)
665 {
666         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
667         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
668         u32 regbit;
669         int ret;
670
671         gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
672
673         if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
674             adreno_is_a540(adreno_gpu))
675                 gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
676
677         /* Make all blocks contribute to the GPU BUSY perf counter */
678         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
679
680         /* Enable RBBM error reporting bits */
681         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
682
683         if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
684                 /*
685                  * Mask out the activity signals from RB1-3 to avoid false
686                  * positives
687                  */
688
689                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
690                         0xF0000000);
691                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
692                         0xFFFFFFFF);
693                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
694                         0xFFFFFFFF);
695                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
696                         0xFFFFFFFF);
697                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
698                         0xFFFFFFFF);
699                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
700                         0xFFFFFFFF);
701                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
702                         0xFFFFFFFF);
703                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
704                         0xFFFFFFFF);
705         }
706
707         /* Enable fault detection */
708         gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
709                 (1 << 30) | 0xFFFF);
710
711         /* Turn on performance counters */
712         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
713
714         /* Select CP0 to always count cycles */
715         gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
716
717         /* Select RBBM0 to countable 6 to get the busy status for devfreq */
718         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
719
720         /* Increase VFD cache access so LRZ and other data gets evicted less */
721         gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
722
723         /* Disable L2 bypass in the UCHE */
724         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
725         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
726         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
727         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
728
729         /* Set the GMEM VA range (0 to gpu->gmem) */
730         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
731         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
732         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
733                 0x00100000 + adreno_gpu->gmem - 1);
734         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
735
736         if (adreno_is_a508(adreno_gpu) || adreno_is_a510(adreno_gpu)) {
737                 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
738                 if (adreno_is_a508(adreno_gpu))
739                         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
740                 else
741                         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
742                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
743                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
744         } else {
745                 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
746                 if (adreno_is_a530(adreno_gpu))
747                         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
748                 else
749                         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
750                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
751                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
752         }
753
754         if (adreno_is_a508(adreno_gpu))
755                 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
756                           (0x100 << 11 | 0x100 << 22));
757         else if (adreno_is_a509(adreno_gpu) || adreno_is_a510(adreno_gpu) ||
758                  adreno_is_a512(adreno_gpu))
759                 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
760                           (0x200 << 11 | 0x200 << 22));
761         else
762                 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
763                           (0x400 << 11 | 0x300 << 22));
764
765         if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
766                 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
767
768         /*
769          * Disable the RB sampler datapath DP2 clock gating optimization
770          * for 1-SP GPUs, as it is enabled by default.
771          */
772         if (adreno_is_a508(adreno_gpu) || adreno_is_a509(adreno_gpu) ||
773             adreno_is_a512(adreno_gpu))
774                 gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, 0, (1 << 9));
775
776         /* Disable UCHE global filter as SP can invalidate/flush independently */
777         gpu_write(gpu, REG_A5XX_UCHE_MODE_CNTL, BIT(29));
778
779         /* Enable USE_RETENTION_FLOPS */
780         gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
781
782         /* Enable ME/PFP split notification */
783         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
784
785         /*
786          *  In A5x, CCU can send context_done event of a particular context to
787          *  UCHE which ultimately reaches CP even when there is valid
788          *  transaction of that context inside CCU. This can let CP to program
789          *  config registers, which will make the "valid transaction" inside
790          *  CCU to be interpreted differently. This can cause gpu fault. This
791          *  bug is fixed in latest A510 revision. To enable this bug fix -
792          *  bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
793          *  (disable). For older A510 version this bit is unused.
794          */
795         if (adreno_is_a510(adreno_gpu))
796                 gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
797
798         /* Enable HWCG */
799         a5xx_set_hwcg(gpu, true);
800
801         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
802
803         /* Set the highest bank bit */
804         if (adreno_is_a540(adreno_gpu))
805                 regbit = 2;
806         else
807                 regbit = 1;
808
809         gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, regbit << 7);
810         gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, regbit << 1);
811
812         if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
813             adreno_is_a540(adreno_gpu))
814                 gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, regbit);
815
816         /* Disable All flat shading optimization (ALLFLATOPTDIS) */
817         gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, (1 << 10));
818
819         /* Protect registers from the CP */
820         gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
821
822         /* RBBM */
823         gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
824         gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
825         gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
826         gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
827         gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
828         gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
829
830         /* Content protect */
831         gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
832                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
833                         16));
834         gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
835                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
836
837         /* CP */
838         gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
839         gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
840         gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
841         gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
842
843         /* RB */
844         gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
845         gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
846
847         /* VPC */
848         gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
849         gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 16));
850
851         /* UCHE */
852         gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
853
854         if (adreno_is_a508(adreno_gpu) || adreno_is_a509(adreno_gpu) ||
855             adreno_is_a510(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
856             adreno_is_a530(adreno_gpu))
857                 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
858                         ADRENO_PROTECT_RW(0x10000, 0x8000));
859
860         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
861         /*
862          * Disable the trusted memory range - we don't actually supported secure
863          * memory rendering at this point in time and we don't want to block off
864          * part of the virtual memory space.
865          */
866         gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
867                 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
868         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
869
870         /* Put the GPU into 64 bit by default */
871         gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
872         gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
873         gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
874         gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
875         gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
876         gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
877         gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
878         gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
879         gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
880         gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
881         gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
882         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
883
884         /*
885          * VPC corner case with local memory load kill leads to corrupt
886          * internal state. Normal Disable does not work for all a5x chips.
887          * So do the following setting to disable it.
888          */
889         if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
890                 gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
891                 gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
892         }
893
894         ret = adreno_hw_init(gpu);
895         if (ret)
896                 return ret;
897
898         if (!(adreno_is_a508(adreno_gpu) || adreno_is_a509(adreno_gpu) ||
899               adreno_is_a510(adreno_gpu) || adreno_is_a512(adreno_gpu)))
900                 a5xx_gpmu_ucode_init(gpu);
901
902         ret = a5xx_ucode_init(gpu);
903         if (ret)
904                 return ret;
905
906         /* Set the ringbuffer address */
907         gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
908                 gpu->rb[0]->iova);
909
910         /*
911          * If the microcode supports the WHERE_AM_I opcode then we can use that
912          * in lieu of the RPTR shadow and enable preemption. Otherwise, we
913          * can't safely use the RPTR shadow or preemption. In either case, the
914          * RPTR shadow should be disabled in hardware.
915          */
916         gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
917                 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
918
919         /* Create a privileged buffer for the RPTR shadow */
920         if (a5xx_gpu->has_whereami) {
921                 if (!a5xx_gpu->shadow_bo) {
922                         a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
923                                 sizeof(u32) * gpu->nr_rings,
924                                 MSM_BO_WC | MSM_BO_MAP_PRIV,
925                                 gpu->aspace, &a5xx_gpu->shadow_bo,
926                                 &a5xx_gpu->shadow_iova);
927
928                         if (IS_ERR(a5xx_gpu->shadow))
929                                 return PTR_ERR(a5xx_gpu->shadow);
930                 }
931
932                 gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
933                         REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0]));
934         } else if (gpu->nr_rings > 1) {
935                 /* Disable preemption if WHERE_AM_I isn't available */
936                 a5xx_preempt_fini(gpu);
937                 gpu->nr_rings = 1;
938         }
939
940         a5xx_preempt_hw_init(gpu);
941
942         /* Disable the interrupts through the initial bringup stage */
943         gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
944
945         /* Clear ME_HALT to start the micro engine */
946         gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
947         ret = a5xx_me_init(gpu);
948         if (ret)
949                 return ret;
950
951         ret = a5xx_power_init(gpu);
952         if (ret)
953                 return ret;
954
955         /*
956          * Send a pipeline event stat to get misbehaving counters to start
957          * ticking correctly
958          */
959         if (adreno_is_a530(adreno_gpu)) {
960                 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
961                 OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
962
963                 a5xx_flush(gpu, gpu->rb[0], true);
964                 if (!a5xx_idle(gpu, gpu->rb[0]))
965                         return -EINVAL;
966         }
967
968         /*
969          * If the chip that we are using does support loading one, then
970          * try to load a zap shader into the secure world. If successful
971          * we can use the CP to switch out of secure mode. If not then we
972          * have no resource but to try to switch ourselves out manually. If we
973          * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
974          * be blocked and a permissions violation will soon follow.
975          */
976         ret = a5xx_zap_shader_init(gpu);
977         if (!ret) {
978                 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
979                 OUT_RING(gpu->rb[0], 0x00000000);
980
981                 a5xx_flush(gpu, gpu->rb[0], true);
982                 if (!a5xx_idle(gpu, gpu->rb[0]))
983                         return -EINVAL;
984         } else if (ret == -ENODEV) {
985                 /*
986                  * This device does not use zap shader (but print a warning
987                  * just in case someone got their dt wrong.. hopefully they
988                  * have a debug UART to realize the error of their ways...
989                  * if you mess this up you are about to crash horribly)
990                  */
991                 dev_warn_once(gpu->dev->dev,
992                         "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
993                 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
994         } else {
995                 return ret;
996         }
997
998         /* Last step - yield the ringbuffer */
999         a5xx_preempt_start(gpu);
1000
1001         return 0;
1002 }
1003
1004 static void a5xx_recover(struct msm_gpu *gpu)
1005 {
1006         int i;
1007
1008         adreno_dump_info(gpu);
1009
1010         for (i = 0; i < 8; i++) {
1011                 printk("CP_SCRATCH_REG%d: %u\n", i,
1012                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
1013         }
1014
1015         if (hang_debug)
1016                 a5xx_dump(gpu);
1017
1018         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
1019         gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
1020         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
1021         adreno_recover(gpu);
1022 }
1023
1024 static void a5xx_destroy(struct msm_gpu *gpu)
1025 {
1026         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1027         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1028
1029         DBG("%s", gpu->name);
1030
1031         a5xx_preempt_fini(gpu);
1032
1033         if (a5xx_gpu->pm4_bo) {
1034                 msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
1035                 drm_gem_object_put(a5xx_gpu->pm4_bo);
1036         }
1037
1038         if (a5xx_gpu->pfp_bo) {
1039                 msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
1040                 drm_gem_object_put(a5xx_gpu->pfp_bo);
1041         }
1042
1043         if (a5xx_gpu->gpmu_bo) {
1044                 msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
1045                 drm_gem_object_put(a5xx_gpu->gpmu_bo);
1046         }
1047
1048         if (a5xx_gpu->shadow_bo) {
1049                 msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace);
1050                 drm_gem_object_put(a5xx_gpu->shadow_bo);
1051         }
1052
1053         adreno_gpu_cleanup(adreno_gpu);
1054         kfree(a5xx_gpu);
1055 }
1056
1057 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
1058 {
1059         if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
1060                 return false;
1061
1062         /*
1063          * Nearly every abnormality ends up pausing the GPU and triggering a
1064          * fault so we can safely just watch for this one interrupt to fire
1065          */
1066         return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
1067                 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
1068 }
1069
1070 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1071 {
1072         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1073         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1074
1075         if (ring != a5xx_gpu->cur_ring) {
1076                 WARN(1, "Tried to idle a non-current ringbuffer\n");
1077                 return false;
1078         }
1079
1080         /* wait for CP to drain ringbuffer: */
1081         if (!adreno_idle(gpu, ring))
1082                 return false;
1083
1084         if (spin_until(_a5xx_check_idle(gpu))) {
1085                 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
1086                         gpu->name, __builtin_return_address(0),
1087                         gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1088                         gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
1089                         gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1090                         gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
1091                 return false;
1092         }
1093
1094         return true;
1095 }
1096
1097 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1098 {
1099         struct msm_gpu *gpu = arg;
1100         pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
1101                         iova, flags,
1102                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
1103                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
1104                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
1105                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
1106
1107         return 0;
1108 }
1109
1110 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
1111 {
1112         u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
1113
1114         if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
1115                 u32 val;
1116
1117                 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
1118
1119                 /*
1120                  * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
1121                  * read it twice
1122                  */
1123
1124                 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
1125                 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
1126
1127                 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
1128                         val);
1129         }
1130
1131         if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
1132                 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
1133                         gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
1134
1135         if (status & A5XX_CP_INT_CP_DMA_ERROR)
1136                 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
1137
1138         if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
1139                 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
1140
1141                 dev_err_ratelimited(gpu->dev->dev,
1142                         "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
1143                         val & (1 << 24) ? "WRITE" : "READ",
1144                         (val & 0xFFFFF) >> 2, val);
1145         }
1146
1147         if (status & A5XX_CP_INT_CP_AHB_ERROR) {
1148                 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
1149                 const char *access[16] = { "reserved", "reserved",
1150                         "timestamp lo", "timestamp hi", "pfp read", "pfp write",
1151                         "", "", "me read", "me write", "", "", "crashdump read",
1152                         "crashdump write" };
1153
1154                 dev_err_ratelimited(gpu->dev->dev,
1155                         "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
1156                         status & 0xFFFFF, access[(status >> 24) & 0xF],
1157                         (status & (1 << 31)), status);
1158         }
1159 }
1160
1161 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
1162 {
1163         if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
1164                 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
1165
1166                 dev_err_ratelimited(gpu->dev->dev,
1167                         "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
1168                         val & (1 << 28) ? "WRITE" : "READ",
1169                         (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
1170                         (val >> 24) & 0xF);
1171
1172                 /* Clear the error */
1173                 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
1174
1175                 /* Clear the interrupt */
1176                 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1177                         A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1178         }
1179
1180         if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
1181                 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
1182
1183         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
1184                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
1185                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
1186
1187         if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
1188                 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
1189                         gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
1190
1191         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
1192                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
1193                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
1194
1195         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1196                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1197
1198         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1199                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1200 }
1201
1202 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1203 {
1204         uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1205
1206         addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1207
1208         dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1209                 addr);
1210 }
1211
1212 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1213 {
1214         dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1215 }
1216
1217 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1218 {
1219         struct drm_device *dev = gpu->dev;
1220         struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1221
1222         /*
1223          * If stalled on SMMU fault, we could trip the GPU's hang detection,
1224          * but the fault handler will trigger the devcore dump, and we want
1225          * to otherwise resume normally rather than killing the submit, so
1226          * just bail.
1227          */
1228         if (gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24))
1229                 return;
1230
1231         DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1232                 ring ? ring->id : -1, ring ? ring->seqno : 0,
1233                 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1234                 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1235                 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1236                 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1237                 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1238                 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1239                 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1240
1241         /* Turn off the hangcheck timer to keep it from bothering us */
1242         del_timer(&gpu->hangcheck_timer);
1243
1244         kthread_queue_work(gpu->worker, &gpu->recover_work);
1245 }
1246
1247 #define RBBM_ERROR_MASK \
1248         (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1249         A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1250         A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1251         A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1252         A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1253         A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1254
1255 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1256 {
1257         u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1258
1259         /*
1260          * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1261          * before the source is cleared the interrupt will storm.
1262          */
1263         gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1264                 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1265
1266         /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1267         if (status & RBBM_ERROR_MASK)
1268                 a5xx_rbbm_err_irq(gpu, status);
1269
1270         if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1271                 a5xx_cp_err_irq(gpu);
1272
1273         if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1274                 a5xx_fault_detect_irq(gpu);
1275
1276         if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1277                 a5xx_uche_err_irq(gpu);
1278
1279         if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1280                 a5xx_gpmu_err_irq(gpu);
1281
1282         if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1283                 a5xx_preempt_trigger(gpu);
1284                 msm_gpu_retire(gpu);
1285         }
1286
1287         if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1288                 a5xx_preempt_irq(gpu);
1289
1290         return IRQ_HANDLED;
1291 }
1292
1293 static const u32 a5xx_registers[] = {
1294         0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1295         0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1296         0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1297         0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1298         0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1299         0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1300         0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1301         0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1302         0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1303         0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1304         0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1305         0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1306         0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1307         0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1308         0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1309         0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1310         0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1311         0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1312         0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1313         0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1314         0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1315         0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1316         0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1317         0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1318         0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1319         0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1320         0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1321         0xAC60, 0xAC60, ~0,
1322 };
1323
1324 static void a5xx_dump(struct msm_gpu *gpu)
1325 {
1326         DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1327                 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1328         adreno_dump(gpu);
1329 }
1330
1331 static int a5xx_pm_resume(struct msm_gpu *gpu)
1332 {
1333         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1334         int ret;
1335
1336         /* Turn on the core power */
1337         ret = msm_gpu_pm_resume(gpu);
1338         if (ret)
1339                 return ret;
1340
1341         /* Adreno 508, 509, 510, 512 needs manual RBBM sus/res control */
1342         if (!(adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu))) {
1343                 /* Halt the sp_input_clk at HM level */
1344                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1345                 a5xx_set_hwcg(gpu, true);
1346                 /* Turn on sp_input_clk at HM level */
1347                 gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1348                 return 0;
1349         }
1350
1351         /* Turn the RBCCU domain first to limit the chances of voltage droop */
1352         gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1353
1354         /* Wait 3 usecs before polling */
1355         udelay(3);
1356
1357         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1358                 (1 << 20), (1 << 20));
1359         if (ret) {
1360                 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1361                         gpu->name,
1362                         gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1363                 return ret;
1364         }
1365
1366         /* Turn on the SP domain */
1367         gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1368         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1369                 (1 << 20), (1 << 20));
1370         if (ret)
1371                 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1372                         gpu->name);
1373
1374         return ret;
1375 }
1376
1377 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1378 {
1379         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1380         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1381         u32 mask = 0xf;
1382         int i, ret;
1383
1384         /* A508, A510 have 3 XIN ports in VBIF */
1385         if (adreno_is_a508(adreno_gpu) || adreno_is_a510(adreno_gpu))
1386                 mask = 0x7;
1387
1388         /* Clear the VBIF pipe before shutting down */
1389         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1390         spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1391                                 mask) == mask);
1392
1393         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1394
1395         /*
1396          * Reset the VBIF before power collapse to avoid issue with FIFO
1397          * entries on Adreno A510 and A530 (the others will tend to lock up)
1398          */
1399         if (adreno_is_a510(adreno_gpu) || adreno_is_a530(adreno_gpu)) {
1400                 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1401                 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1402         }
1403
1404         ret = msm_gpu_pm_suspend(gpu);
1405         if (ret)
1406                 return ret;
1407
1408         if (a5xx_gpu->has_whereami)
1409                 for (i = 0; i < gpu->nr_rings; i++)
1410                         a5xx_gpu->shadow[i] = 0;
1411
1412         return 0;
1413 }
1414
1415 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1416 {
1417         *value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
1418                 REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
1419
1420         return 0;
1421 }
1422
1423 struct a5xx_crashdumper {
1424         void *ptr;
1425         struct drm_gem_object *bo;
1426         u64 iova;
1427 };
1428
1429 struct a5xx_gpu_state {
1430         struct msm_gpu_state base;
1431         u32 *hlsqregs;
1432 };
1433
1434 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1435                 struct a5xx_crashdumper *dumper)
1436 {
1437         dumper->ptr = msm_gem_kernel_new(gpu->dev,
1438                 SZ_1M, MSM_BO_WC, gpu->aspace,
1439                 &dumper->bo, &dumper->iova);
1440
1441         if (!IS_ERR(dumper->ptr))
1442                 msm_gem_object_set_name(dumper->bo, "crashdump");
1443
1444         return PTR_ERR_OR_ZERO(dumper->ptr);
1445 }
1446
1447 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1448                 struct a5xx_crashdumper *dumper)
1449 {
1450         u32 val;
1451
1452         if (IS_ERR_OR_NULL(dumper->ptr))
1453                 return -EINVAL;
1454
1455         gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1456                 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1457
1458         gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1459
1460         return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1461                 val & 0x04, 100, 10000);
1462 }
1463
1464 /*
1465  * These are a list of the registers that need to be read through the HLSQ
1466  * aperture through the crashdumper.  These are not nominally accessible from
1467  * the CPU on a secure platform.
1468  */
1469 static const struct {
1470         u32 type;
1471         u32 regoffset;
1472         u32 count;
1473 } a5xx_hlsq_aperture_regs[] = {
1474         { 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1475         { 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1476         { 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1477         { 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1478         { 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1479         { 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1480         { 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1481         { 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1482         { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1483         { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1484         { 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1485         { 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1486         { 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1487         { 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1488         { 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1489 };
1490
1491 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1492                 struct a5xx_gpu_state *a5xx_state)
1493 {
1494         struct a5xx_crashdumper dumper = { 0 };
1495         u32 offset, count = 0;
1496         u64 *ptr;
1497         int i;
1498
1499         if (a5xx_crashdumper_init(gpu, &dumper))
1500                 return;
1501
1502         /* The script will be written at offset 0 */
1503         ptr = dumper.ptr;
1504
1505         /* Start writing the data at offset 256k */
1506         offset = dumper.iova + (256 * SZ_1K);
1507
1508         /* Count how many additional registers to get from the HLSQ aperture */
1509         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1510                 count += a5xx_hlsq_aperture_regs[i].count;
1511
1512         a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1513         if (!a5xx_state->hlsqregs)
1514                 return;
1515
1516         /* Build the crashdump script */
1517         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1518                 u32 type = a5xx_hlsq_aperture_regs[i].type;
1519                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1520
1521                 /* Write the register to select the desired bank */
1522                 *ptr++ = ((u64) type << 8);
1523                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1524                         (1 << 21) | 1;
1525
1526                 *ptr++ = offset;
1527                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1528                         | c;
1529
1530                 offset += c * sizeof(u32);
1531         }
1532
1533         /* Write two zeros to close off the script */
1534         *ptr++ = 0;
1535         *ptr++ = 0;
1536
1537         if (a5xx_crashdumper_run(gpu, &dumper)) {
1538                 kfree(a5xx_state->hlsqregs);
1539                 msm_gem_kernel_put(dumper.bo, gpu->aspace);
1540                 return;
1541         }
1542
1543         /* Copy the data from the crashdumper to the state */
1544         memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1545                 count * sizeof(u32));
1546
1547         msm_gem_kernel_put(dumper.bo, gpu->aspace);
1548 }
1549
1550 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1551 {
1552         struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1553                         GFP_KERNEL);
1554         bool stalled = !!(gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24));
1555
1556         if (!a5xx_state)
1557                 return ERR_PTR(-ENOMEM);
1558
1559         /* Temporarily disable hardware clock gating before reading the hw */
1560         a5xx_set_hwcg(gpu, false);
1561
1562         /* First get the generic state from the adreno core */
1563         adreno_gpu_state_get(gpu, &(a5xx_state->base));
1564
1565         a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1566
1567         /*
1568          * Get the HLSQ regs with the help of the crashdumper, but only if
1569          * we are not stalled in an iommu fault (in which case the crashdumper
1570          * would not have access to memory)
1571          */
1572         if (!stalled)
1573                 a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1574
1575         a5xx_set_hwcg(gpu, true);
1576
1577         return &a5xx_state->base;
1578 }
1579
1580 static void a5xx_gpu_state_destroy(struct kref *kref)
1581 {
1582         struct msm_gpu_state *state = container_of(kref,
1583                 struct msm_gpu_state, ref);
1584         struct a5xx_gpu_state *a5xx_state = container_of(state,
1585                 struct a5xx_gpu_state, base);
1586
1587         kfree(a5xx_state->hlsqregs);
1588
1589         adreno_gpu_state_destroy(state);
1590         kfree(a5xx_state);
1591 }
1592
1593 static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1594 {
1595         if (IS_ERR_OR_NULL(state))
1596                 return 1;
1597
1598         return kref_put(&state->ref, a5xx_gpu_state_destroy);
1599 }
1600
1601
1602 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1603 static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1604                       struct drm_printer *p)
1605 {
1606         int i, j;
1607         u32 pos = 0;
1608         struct a5xx_gpu_state *a5xx_state = container_of(state,
1609                 struct a5xx_gpu_state, base);
1610
1611         if (IS_ERR_OR_NULL(state))
1612                 return;
1613
1614         adreno_show(gpu, state, p);
1615
1616         /* Dump the additional a5xx HLSQ registers */
1617         if (!a5xx_state->hlsqregs)
1618                 return;
1619
1620         drm_printf(p, "registers-hlsq:\n");
1621
1622         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1623                 u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1624                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1625
1626                 for (j = 0; j < c; j++, pos++, o++) {
1627                         /*
1628                          * To keep the crashdump simple we pull the entire range
1629                          * for each register type but not all of the registers
1630                          * in the range are valid. Fortunately invalid registers
1631                          * stick out like a sore thumb with a value of
1632                          * 0xdeadbeef
1633                          */
1634                         if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1635                                 continue;
1636
1637                         drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1638                                 o << 2, a5xx_state->hlsqregs[pos]);
1639                 }
1640         }
1641 }
1642 #endif
1643
1644 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1645 {
1646         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1647         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1648
1649         return a5xx_gpu->cur_ring;
1650 }
1651
1652 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1653 {
1654         u64 busy_cycles, busy_time;
1655
1656         /* Only read the gpu busy if the hardware is already active */
1657         if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0)
1658                 return 0;
1659
1660         busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1661                         REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1662
1663         busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1664         do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1665
1666         gpu->devfreq.busy_cycles = busy_cycles;
1667
1668         pm_runtime_put(&gpu->pdev->dev);
1669
1670         if (WARN_ON(busy_time > ~0LU))
1671                 return ~0LU;
1672
1673         return (unsigned long)busy_time;
1674 }
1675
1676 static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1677 {
1678         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1679         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1680
1681         if (a5xx_gpu->has_whereami)
1682                 return a5xx_gpu->shadow[ring->id];
1683
1684         return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR);
1685 }
1686
1687 static const struct adreno_gpu_funcs funcs = {
1688         .base = {
1689                 .get_param = adreno_get_param,
1690                 .hw_init = a5xx_hw_init,
1691                 .pm_suspend = a5xx_pm_suspend,
1692                 .pm_resume = a5xx_pm_resume,
1693                 .recover = a5xx_recover,
1694                 .submit = a5xx_submit,
1695                 .active_ring = a5xx_active_ring,
1696                 .irq = a5xx_irq,
1697                 .destroy = a5xx_destroy,
1698 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1699                 .show = a5xx_show,
1700 #endif
1701 #if defined(CONFIG_DEBUG_FS)
1702                 .debugfs_init = a5xx_debugfs_init,
1703 #endif
1704                 .gpu_busy = a5xx_gpu_busy,
1705                 .gpu_state_get = a5xx_gpu_state_get,
1706                 .gpu_state_put = a5xx_gpu_state_put,
1707                 .create_address_space = adreno_iommu_create_address_space,
1708                 .get_rptr = a5xx_get_rptr,
1709         },
1710         .get_timestamp = a5xx_get_timestamp,
1711 };
1712
1713 static void check_speed_bin(struct device *dev)
1714 {
1715         struct nvmem_cell *cell;
1716         u32 val;
1717
1718         /*
1719          * If the OPP table specifies a opp-supported-hw property then we have
1720          * to set something with dev_pm_opp_set_supported_hw() or the table
1721          * doesn't get populated so pick an arbitrary value that should
1722          * ensure the default frequencies are selected but not conflict with any
1723          * actual bins
1724          */
1725         val = 0x80;
1726
1727         cell = nvmem_cell_get(dev, "speed_bin");
1728
1729         if (!IS_ERR(cell)) {
1730                 void *buf = nvmem_cell_read(cell, NULL);
1731
1732                 if (!IS_ERR(buf)) {
1733                         u8 bin = *((u8 *) buf);
1734
1735                         val = (1 << bin);
1736                         kfree(buf);
1737                 }
1738
1739                 nvmem_cell_put(cell);
1740         }
1741
1742         devm_pm_opp_set_supported_hw(dev, &val, 1);
1743 }
1744
1745 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1746 {
1747         struct msm_drm_private *priv = dev->dev_private;
1748         struct platform_device *pdev = priv->gpu_pdev;
1749         struct a5xx_gpu *a5xx_gpu = NULL;
1750         struct adreno_gpu *adreno_gpu;
1751         struct msm_gpu *gpu;
1752         int ret;
1753
1754         if (!pdev) {
1755                 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1756                 return ERR_PTR(-ENXIO);
1757         }
1758
1759         a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1760         if (!a5xx_gpu)
1761                 return ERR_PTR(-ENOMEM);
1762
1763         adreno_gpu = &a5xx_gpu->base;
1764         gpu = &adreno_gpu->base;
1765
1766         adreno_gpu->registers = a5xx_registers;
1767
1768         a5xx_gpu->lm_leakage = 0x4E001A;
1769
1770         check_speed_bin(&pdev->dev);
1771
1772         ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1773         if (ret) {
1774                 a5xx_destroy(&(a5xx_gpu->base.base));
1775                 return ERR_PTR(ret);
1776         }
1777
1778         if (gpu->aspace)
1779                 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1780
1781         /* Set up the preemption specific bits and pieces for each ringbuffer */
1782         a5xx_preempt_init(gpu);
1783
1784         return gpu;
1785 }