Merge tag 'mips_5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux
[linux-2.6-microblaze.git] / drivers / gpu / drm / msm / adreno / a4xx_gpu.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2014 The Linux Foundation. All rights reserved.
3  */
4 #include "a4xx_gpu.h"
5
6 #define A4XX_INT0_MASK \
7         (A4XX_INT0_RBBM_AHB_ERROR |        \
8          A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
9          A4XX_INT0_CP_T0_PACKET_IN_IB |    \
10          A4XX_INT0_CP_OPCODE_ERROR |       \
11          A4XX_INT0_CP_RESERVED_BIT_ERROR | \
12          A4XX_INT0_CP_HW_FAULT |           \
13          A4XX_INT0_CP_IB1_INT |            \
14          A4XX_INT0_CP_IB2_INT |            \
15          A4XX_INT0_CP_RB_INT |             \
16          A4XX_INT0_CP_REG_PROTECT_FAULT |  \
17          A4XX_INT0_CP_AHB_ERROR_HALT |     \
18          A4XX_INT0_CACHE_FLUSH_TS |        \
19          A4XX_INT0_UCHE_OOB_ACCESS)
20
21 extern bool hang_debug;
22 static void a4xx_dump(struct msm_gpu *gpu);
23 static bool a4xx_idle(struct msm_gpu *gpu);
24
25 static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
26 {
27         struct msm_drm_private *priv = gpu->dev->dev_private;
28         struct msm_ringbuffer *ring = submit->ring;
29         unsigned int i;
30
31         for (i = 0; i < submit->nr_cmds; i++) {
32                 switch (submit->cmd[i].type) {
33                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
34                         /* ignore IB-targets */
35                         break;
36                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
37                         /* ignore if there has not been a ctx switch: */
38                         if (priv->lastctx == submit->queue->ctx)
39                                 break;
40                         fallthrough;
41                 case MSM_SUBMIT_CMD_BUF:
42                         OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2);
43                         OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
44                         OUT_RING(ring, submit->cmd[i].size);
45                         OUT_PKT2(ring);
46                         break;
47                 }
48         }
49
50         OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
51         OUT_RING(ring, submit->seqno);
52
53         /* Flush HLSQ lazy updates to make sure there is nothing
54          * pending for indirect loads after the timestamp has
55          * passed:
56          */
57         OUT_PKT3(ring, CP_EVENT_WRITE, 1);
58         OUT_RING(ring, HLSQ_FLUSH);
59
60         /* wait for idle before cache flush/interrupt */
61         OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
62         OUT_RING(ring, 0x00000000);
63
64         /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
65         OUT_PKT3(ring, CP_EVENT_WRITE, 3);
66         OUT_RING(ring, CACHE_FLUSH_TS | BIT(31));
67         OUT_RING(ring, rbmemptr(ring, fence));
68         OUT_RING(ring, submit->seqno);
69
70         adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
71 }
72
73 /*
74  * a4xx_enable_hwcg() - Program the clock control registers
75  * @device: The adreno device pointer
76  */
77 static void a4xx_enable_hwcg(struct msm_gpu *gpu)
78 {
79         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
80         unsigned int i;
81         for (i = 0; i < 4; i++)
82                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
83         for (i = 0; i < 4; i++)
84                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
85         for (i = 0; i < 4; i++)
86                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
87         for (i = 0; i < 4; i++)
88                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
89         for (i = 0; i < 4; i++)
90                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
91         for (i = 0; i < 4; i++)
92                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
93         for (i = 0; i < 4; i++)
94                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
95         for (i = 0; i < 4; i++)
96                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
97         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
98         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
99         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
100         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
101         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
102         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
103         for (i = 0; i < 4; i++)
104                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
105
106         /* Disable L1 clocking in A420 due to CCU issues with it */
107         for (i = 0; i < 4; i++) {
108                 if (adreno_is_a420(adreno_gpu)) {
109                         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
110                                         0x00002020);
111                 } else {
112                         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
113                                         0x00022020);
114                 }
115         }
116
117         /* No CCU for A405 */
118         if (!adreno_is_a405(adreno_gpu)) {
119                 for (i = 0; i < 4; i++) {
120                         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
121                                         0x00000922);
122                 }
123
124                 for (i = 0; i < 4; i++) {
125                         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
126                                         0x00000000);
127                 }
128
129                 for (i = 0; i < 4; i++) {
130                         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
131                                         0x00000001);
132                 }
133         }
134
135         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
136         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
137         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
138         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
139         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
140         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
141         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
142         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
143         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
144         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
145         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
146         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
147         /* Early A430's have a timing issue with SP/TP power collapse;
148            disabling HW clock gating prevents it. */
149         if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
150                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
151         else
152                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
153         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
154 }
155
156
157 static bool a4xx_me_init(struct msm_gpu *gpu)
158 {
159         struct msm_ringbuffer *ring = gpu->rb[0];
160
161         OUT_PKT3(ring, CP_ME_INIT, 17);
162         OUT_RING(ring, 0x000003f7);
163         OUT_RING(ring, 0x00000000);
164         OUT_RING(ring, 0x00000000);
165         OUT_RING(ring, 0x00000000);
166         OUT_RING(ring, 0x00000080);
167         OUT_RING(ring, 0x00000100);
168         OUT_RING(ring, 0x00000180);
169         OUT_RING(ring, 0x00006600);
170         OUT_RING(ring, 0x00000150);
171         OUT_RING(ring, 0x0000014e);
172         OUT_RING(ring, 0x00000154);
173         OUT_RING(ring, 0x00000001);
174         OUT_RING(ring, 0x00000000);
175         OUT_RING(ring, 0x00000000);
176         OUT_RING(ring, 0x00000000);
177         OUT_RING(ring, 0x00000000);
178         OUT_RING(ring, 0x00000000);
179
180         adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
181         return a4xx_idle(gpu);
182 }
183
184 static int a4xx_hw_init(struct msm_gpu *gpu)
185 {
186         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
187         struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
188         uint32_t *ptr, len;
189         int i, ret;
190
191         if (adreno_is_a405(adreno_gpu)) {
192                 gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
193         } else if (adreno_is_a420(adreno_gpu)) {
194                 gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
195                 gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
196                 gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
197                 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
198                 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
199                 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
200                 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
201                 gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
202         } else if (adreno_is_a430(adreno_gpu)) {
203                 gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
204                 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
205                 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
206                 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
207                 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
208                 gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
209         } else {
210                 BUG();
211         }
212
213         /* Make all blocks contribute to the GPU BUSY perf counter */
214         gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
215
216         /* Tune the hystersis counters for SP and CP idle detection */
217         gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
218         gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
219
220         if (adreno_is_a430(adreno_gpu)) {
221                 gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
222         }
223
224          /* Enable the RBBM error reporting bits */
225         gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
226
227         /* Enable AHB error reporting*/
228         gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
229
230         /* Enable power counters*/
231         gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
232
233         /*
234          * Turn on hang detection - this spews a lot of useful information
235          * into the RBBM registers on a hang:
236          */
237         gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
238                         (1 << 30) | 0xFFFF);
239
240         gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
241                         (unsigned int)(a4xx_gpu->ocmem.base >> 14));
242
243         /* Turn on performance counters: */
244         gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
245
246         /* use the first CP counter for timestamp queries.. userspace may set
247          * this as well but it selects the same counter/countable:
248          */
249         gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
250
251         if (adreno_is_a430(adreno_gpu))
252                 gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
253
254         /* Disable L2 bypass to avoid UCHE out of bounds errors */
255         gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
256         gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
257
258         gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
259                         (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
260
261         /* On A430 enable SP regfile sleep for power savings */
262         /* TODO downstream does this for !420, so maybe applies for 405 too? */
263         if (!adreno_is_a420(adreno_gpu)) {
264                 gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
265                         0x00000441);
266                 gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
267                         0x00000441);
268         }
269
270         a4xx_enable_hwcg(gpu);
271
272         /*
273          * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
274          * due to timing issue with HLSQ_TP_CLK_EN
275          */
276         if (adreno_is_a420(adreno_gpu)) {
277                 unsigned int val;
278                 val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
279                 val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
280                 val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
281                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
282         }
283
284         /* setup access protection: */
285         gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
286
287         /* RBBM registers */
288         gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
289         gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
290         gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
291         gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
292         gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
293         gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
294
295         /* CP registers */
296         gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
297         gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
298
299
300         /* RB registers */
301         gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
302
303         /* HLSQ registers */
304         gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
305
306         /* VPC registers */
307         gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
308
309         /* SMMU registers */
310         gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
311
312         gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
313
314         ret = adreno_hw_init(gpu);
315         if (ret)
316                 return ret;
317
318         /*
319          * Use the default ringbuffer size and block size but disable the RPTR
320          * shadow
321          */
322         gpu_write(gpu, REG_A4XX_CP_RB_CNTL,
323                 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
324
325         /* Set the ringbuffer address */
326         gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
327
328         /* Load PM4: */
329         ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
330         len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
331         DBG("loading PM4 ucode version: %u", ptr[0]);
332         gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
333         for (i = 1; i < len; i++)
334                 gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
335
336         /* Load PFP: */
337         ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
338         len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
339         DBG("loading PFP ucode version: %u", ptr[0]);
340
341         gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
342         for (i = 1; i < len; i++)
343                 gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
344
345         /* clear ME_HALT to start micro engine */
346         gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
347
348         return a4xx_me_init(gpu) ? 0 : -EINVAL;
349 }
350
351 static void a4xx_recover(struct msm_gpu *gpu)
352 {
353         int i;
354
355         adreno_dump_info(gpu);
356
357         for (i = 0; i < 8; i++) {
358                 printk("CP_SCRATCH_REG%d: %u\n", i,
359                         gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
360         }
361
362         /* dump registers before resetting gpu, if enabled: */
363         if (hang_debug)
364                 a4xx_dump(gpu);
365
366         gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
367         gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
368         gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
369         adreno_recover(gpu);
370 }
371
372 static void a4xx_destroy(struct msm_gpu *gpu)
373 {
374         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
375         struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
376
377         DBG("%s", gpu->name);
378
379         adreno_gpu_cleanup(adreno_gpu);
380
381         adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem);
382
383         kfree(a4xx_gpu);
384 }
385
386 static bool a4xx_idle(struct msm_gpu *gpu)
387 {
388         /* wait for ringbuffer to drain: */
389         if (!adreno_idle(gpu, gpu->rb[0]))
390                 return false;
391
392         /* then wait for GPU to finish: */
393         if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
394                                         A4XX_RBBM_STATUS_GPU_BUSY))) {
395                 DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
396                 /* TODO maybe we need to reset GPU here to recover from hang? */
397                 return false;
398         }
399
400         return true;
401 }
402
403 static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
404 {
405         uint32_t status;
406
407         status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
408         DBG("%s: Int status %08x", gpu->name, status);
409
410         if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
411                 uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
412                 printk("CP | Protected mode error| %s | addr=%x\n",
413                         reg & (1 << 24) ? "WRITE" : "READ",
414                         (reg & 0xFFFFF) >> 2);
415         }
416
417         gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
418
419         msm_gpu_retire(gpu);
420
421         return IRQ_HANDLED;
422 }
423
424 static const unsigned int a4xx_registers[] = {
425         /* RBBM */
426         0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
427         0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
428         0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
429         /* CP */
430         0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
431         0x0578, 0x058F,
432         /* VSC */
433         0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
434         /* GRAS */
435         0x0C80, 0x0C81, 0x0C88, 0x0C8F,
436         /* RB */
437         0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
438         /* PC */
439         0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
440         /* VFD */
441         0x0E40, 0x0E4A,
442         /* VPC */
443         0x0E60, 0x0E61, 0x0E63, 0x0E68,
444         /* UCHE */
445         0x0E80, 0x0E84, 0x0E88, 0x0E95,
446         /* VMIDMT */
447         0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
448         0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
449         0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
450         0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
451         0x1380, 0x1380,
452         /* GRAS CTX 0 */
453         0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
454         /* PC CTX 0 */
455         0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
456         /* VFD CTX 0 */
457         0x2200, 0x2204, 0x2208, 0x22A9,
458         /* GRAS CTX 1 */
459         0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
460         /* PC CTX 1 */
461         0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
462         /* VFD CTX 1 */
463         0x2600, 0x2604, 0x2608, 0x26A9,
464         /* XPU */
465         0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
466         0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
467         0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
468         /* VBIF */
469         0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
470         0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
471         0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
472         0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
473         0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
474         0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
475         0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
476         0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
477         0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
478         0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
479         0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
480         0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
481         0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
482         0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
483         0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
484         0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
485         0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
486         0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
487         0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
488         0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
489         0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
490         0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
491         0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
492         0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
493         0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
494         0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
495         0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
496         0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
497         0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
498         0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
499         0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
500         0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
501         0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
502         0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
503         ~0 /* sentinel */
504 };
505
506 static const unsigned int a405_registers[] = {
507         /* RBBM */
508         0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
509         0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
510         0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
511         /* CP */
512         0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
513         0x0578, 0x058F,
514         /* VSC */
515         0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
516         /* GRAS */
517         0x0C80, 0x0C81, 0x0C88, 0x0C8F,
518         /* RB */
519         0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
520         /* PC */
521         0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
522         /* VFD */
523         0x0E40, 0x0E4A,
524         /* VPC */
525         0x0E60, 0x0E61, 0x0E63, 0x0E68,
526         /* UCHE */
527         0x0E80, 0x0E84, 0x0E88, 0x0E95,
528         /* GRAS CTX 0 */
529         0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
530         /* PC CTX 0 */
531         0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
532         /* VFD CTX 0 */
533         0x2200, 0x2204, 0x2208, 0x22A9,
534         /* GRAS CTX 1 */
535         0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
536         /* PC CTX 1 */
537         0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
538         /* VFD CTX 1 */
539         0x2600, 0x2604, 0x2608, 0x26A9,
540         /* VBIF version 0x20050000*/
541         0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036,
542         0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049,
543         0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D,
544         0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098,
545         0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0,
546         0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108,
547         0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125,
548         0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410,
549         ~0 /* sentinel */
550 };
551
552 static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
553 {
554         struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
555
556         if (!state)
557                 return ERR_PTR(-ENOMEM);
558
559         adreno_gpu_state_get(gpu, state);
560
561         state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS);
562
563         return state;
564 }
565
566 static void a4xx_dump(struct msm_gpu *gpu)
567 {
568         printk("status:   %08x\n",
569                         gpu_read(gpu, REG_A4XX_RBBM_STATUS));
570         adreno_dump(gpu);
571 }
572
573 static int a4xx_pm_resume(struct msm_gpu *gpu) {
574         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
575         int ret;
576
577         ret = msm_gpu_pm_resume(gpu);
578         if (ret)
579                 return ret;
580
581         if (adreno_is_a430(adreno_gpu)) {
582                 unsigned int reg;
583                 /* Set the default register values; set SW_COLLAPSE to 0 */
584                 gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
585                 do {
586                         udelay(5);
587                         reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
588                 } while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
589         }
590         return 0;
591 }
592
593 static int a4xx_pm_suspend(struct msm_gpu *gpu) {
594         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
595         int ret;
596
597         ret = msm_gpu_pm_suspend(gpu);
598         if (ret)
599                 return ret;
600
601         if (adreno_is_a430(adreno_gpu)) {
602                 /* Set the default register values; set SW_COLLAPSE to 1 */
603                 gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
604         }
605         return 0;
606 }
607
608 static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
609 {
610         *value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
611                 REG_A4XX_RBBM_PERFCTR_CP_0_HI);
612
613         return 0;
614 }
615
616 static u32 a4xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
617 {
618         ring->memptrs->rptr = gpu_read(gpu, REG_A4XX_CP_RB_RPTR);
619         return ring->memptrs->rptr;
620 }
621
622 static const struct adreno_gpu_funcs funcs = {
623         .base = {
624                 .get_param = adreno_get_param,
625                 .hw_init = a4xx_hw_init,
626                 .pm_suspend = a4xx_pm_suspend,
627                 .pm_resume = a4xx_pm_resume,
628                 .recover = a4xx_recover,
629                 .submit = a4xx_submit,
630                 .active_ring = adreno_active_ring,
631                 .irq = a4xx_irq,
632                 .destroy = a4xx_destroy,
633 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
634                 .show = adreno_show,
635 #endif
636                 .gpu_state_get = a4xx_gpu_state_get,
637                 .gpu_state_put = adreno_gpu_state_put,
638                 .create_address_space = adreno_iommu_create_address_space,
639                 .get_rptr = a4xx_get_rptr,
640         },
641         .get_timestamp = a4xx_get_timestamp,
642 };
643
644 struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
645 {
646         struct a4xx_gpu *a4xx_gpu = NULL;
647         struct adreno_gpu *adreno_gpu;
648         struct msm_gpu *gpu;
649         struct msm_drm_private *priv = dev->dev_private;
650         struct platform_device *pdev = priv->gpu_pdev;
651         struct icc_path *ocmem_icc_path;
652         struct icc_path *icc_path;
653         int ret;
654
655         if (!pdev) {
656                 DRM_DEV_ERROR(dev->dev, "no a4xx device\n");
657                 ret = -ENXIO;
658                 goto fail;
659         }
660
661         a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
662         if (!a4xx_gpu) {
663                 ret = -ENOMEM;
664                 goto fail;
665         }
666
667         adreno_gpu = &a4xx_gpu->base;
668         gpu = &adreno_gpu->base;
669
670         gpu->perfcntrs = NULL;
671         gpu->num_perfcntrs = 0;
672
673         ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
674         if (ret)
675                 goto fail;
676
677         adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers :
678                                                              a4xx_registers;
679
680         /* if needed, allocate gmem: */
681         ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu,
682                                     &a4xx_gpu->ocmem);
683         if (ret)
684                 goto fail;
685
686         if (!gpu->aspace) {
687                 /* TODO we think it is possible to configure the GPU to
688                  * restrict access to VRAM carveout.  But the required
689                  * registers are unknown.  For now just bail out and
690                  * limp along with just modesetting.  If it turns out
691                  * to not be possible to restrict access, then we must
692                  * implement a cmdstream validator.
693                  */
694                 DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
695                 if (!allow_vram_carveout) {
696                         ret = -ENXIO;
697                         goto fail;
698                 }
699         }
700
701         icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
702         ret = IS_ERR(icc_path);
703         if (ret)
704                 goto fail;
705
706         ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
707         ret = IS_ERR(ocmem_icc_path);
708         if (ret) {
709                 /* allow -ENODATA, ocmem icc is optional */
710                 if (ret != -ENODATA)
711                         goto fail;
712                 ocmem_icc_path = NULL;
713         }
714
715         /*
716          * Set the ICC path to maximum speed for now by multiplying the fastest
717          * frequency by the bus width (8). We'll want to scale this later on to
718          * improve battery life.
719          */
720         icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
721         icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
722
723         return gpu;
724
725 fail:
726         if (a4xx_gpu)
727                 a4xx_destroy(&a4xx_gpu->base.base);
728
729         return ERR_PTR(ret);
730 }