mt76: mt7663: introduce coredump support
[linux-2.6-microblaze.git] / drivers / gpu / drm / msm / adreno / a3xx_gpu.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2013 Red Hat
4  * Author: Rob Clark <robdclark@gmail.com>
5  *
6  * Copyright (c) 2014 The Linux Foundation. All rights reserved.
7  */
8
9 #include "a3xx_gpu.h"
10
11 #define A3XX_INT0_MASK \
12         (A3XX_INT0_RBBM_AHB_ERROR |        \
13          A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
14          A3XX_INT0_CP_T0_PACKET_IN_IB |    \
15          A3XX_INT0_CP_OPCODE_ERROR |       \
16          A3XX_INT0_CP_RESERVED_BIT_ERROR | \
17          A3XX_INT0_CP_HW_FAULT |           \
18          A3XX_INT0_CP_IB1_INT |            \
19          A3XX_INT0_CP_IB2_INT |            \
20          A3XX_INT0_CP_RB_INT |             \
21          A3XX_INT0_CP_REG_PROTECT_FAULT |  \
22          A3XX_INT0_CP_AHB_ERROR_HALT |     \
23          A3XX_INT0_CACHE_FLUSH_TS |        \
24          A3XX_INT0_UCHE_OOB_ACCESS)
25
26 extern bool hang_debug;
27
28 static void a3xx_dump(struct msm_gpu *gpu);
29 static bool a3xx_idle(struct msm_gpu *gpu);
30
31 static void a3xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
32 {
33         struct msm_drm_private *priv = gpu->dev->dev_private;
34         struct msm_ringbuffer *ring = submit->ring;
35         unsigned int i;
36
37         for (i = 0; i < submit->nr_cmds; i++) {
38                 switch (submit->cmd[i].type) {
39                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
40                         /* ignore IB-targets */
41                         break;
42                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
43                         /* ignore if there has not been a ctx switch: */
44                         if (priv->lastctx == submit->queue->ctx)
45                                 break;
46                         fallthrough;
47                 case MSM_SUBMIT_CMD_BUF:
48                         OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
49                         OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
50                         OUT_RING(ring, submit->cmd[i].size);
51                         OUT_PKT2(ring);
52                         break;
53                 }
54         }
55
56         OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
57         OUT_RING(ring, submit->seqno);
58
59         /* Flush HLSQ lazy updates to make sure there is nothing
60          * pending for indirect loads after the timestamp has
61          * passed:
62          */
63         OUT_PKT3(ring, CP_EVENT_WRITE, 1);
64         OUT_RING(ring, HLSQ_FLUSH);
65
66         /* wait for idle before cache flush/interrupt */
67         OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
68         OUT_RING(ring, 0x00000000);
69
70         /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
71         OUT_PKT3(ring, CP_EVENT_WRITE, 3);
72         OUT_RING(ring, CACHE_FLUSH_TS | BIT(31));
73         OUT_RING(ring, rbmemptr(ring, fence));
74         OUT_RING(ring, submit->seqno);
75
76 #if 0
77         /* Dummy set-constant to trigger context rollover */
78         OUT_PKT3(ring, CP_SET_CONSTANT, 2);
79         OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG));
80         OUT_RING(ring, 0x00000000);
81 #endif
82
83         adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
84 }
85
86 static bool a3xx_me_init(struct msm_gpu *gpu)
87 {
88         struct msm_ringbuffer *ring = gpu->rb[0];
89
90         OUT_PKT3(ring, CP_ME_INIT, 17);
91         OUT_RING(ring, 0x000003f7);
92         OUT_RING(ring, 0x00000000);
93         OUT_RING(ring, 0x00000000);
94         OUT_RING(ring, 0x00000000);
95         OUT_RING(ring, 0x00000080);
96         OUT_RING(ring, 0x00000100);
97         OUT_RING(ring, 0x00000180);
98         OUT_RING(ring, 0x00006600);
99         OUT_RING(ring, 0x00000150);
100         OUT_RING(ring, 0x0000014e);
101         OUT_RING(ring, 0x00000154);
102         OUT_RING(ring, 0x00000001);
103         OUT_RING(ring, 0x00000000);
104         OUT_RING(ring, 0x00000000);
105         OUT_RING(ring, 0x00000000);
106         OUT_RING(ring, 0x00000000);
107         OUT_RING(ring, 0x00000000);
108
109         adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
110         return a3xx_idle(gpu);
111 }
112
113 static int a3xx_hw_init(struct msm_gpu *gpu)
114 {
115         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
116         struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
117         uint32_t *ptr, len;
118         int i, ret;
119
120         DBG("%s", gpu->name);
121
122         if (adreno_is_a305(adreno_gpu)) {
123                 /* Set up 16 deep read/write request queues: */
124                 gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
125                 gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
126                 gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
127                 gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
128                 gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
129                 gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
130                 gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
131                 /* Enable WR-REQ: */
132                 gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
133                 /* Set up round robin arbitration between both AXI ports: */
134                 gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
135                 /* Set up AOOO: */
136                 gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
137                 gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
138         } else if (adreno_is_a306(adreno_gpu)) {
139                 gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
140                 gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
141                 gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
142         } else if (adreno_is_a320(adreno_gpu)) {
143                 /* Set up 16 deep read/write request queues: */
144                 gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
145                 gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
146                 gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
147                 gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
148                 gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
149                 gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
150                 gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
151                 /* Enable WR-REQ: */
152                 gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
153                 /* Set up round robin arbitration between both AXI ports: */
154                 gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
155                 /* Set up AOOO: */
156                 gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
157                 gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
158                 /* Enable 1K sort: */
159                 gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
160                 gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
161
162         } else if (adreno_is_a330v2(adreno_gpu)) {
163                 /*
164                  * Most of the VBIF registers on 8974v2 have the correct
165                  * values at power on, so we won't modify those if we don't
166                  * need to
167                  */
168                 /* Enable 1k sort: */
169                 gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
170                 gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
171                 /* Enable WR-REQ: */
172                 gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
173                 gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
174                 /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
175                 gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
176
177         } else if (adreno_is_a330(adreno_gpu)) {
178                 /* Set up 16 deep read/write request queues: */
179                 gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
180                 gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
181                 gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
182                 gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
183                 gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
184                 gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
185                 gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
186                 /* Enable WR-REQ: */
187                 gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
188                 /* Set up round robin arbitration between both AXI ports: */
189                 gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
190                 /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
191                 gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
192                 /* Set up AOOO: */
193                 gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
194                 gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
195                 /* Enable 1K sort: */
196                 gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
197                 gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
198                 /* Disable VBIF clock gating. This is to enable AXI running
199                  * higher frequency than GPU:
200                  */
201                 gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
202
203         } else {
204                 BUG();
205         }
206
207         /* Make all blocks contribute to the GPU BUSY perf counter: */
208         gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
209
210         /* Tune the hystersis counters for SP and CP idle detection: */
211         gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
212         gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
213
214         /* Enable the RBBM error reporting bits.  This lets us get
215          * useful information on failure:
216          */
217         gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
218
219         /* Enable AHB error reporting: */
220         gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
221
222         /* Turn on the power counters: */
223         gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
224
225         /* Turn on hang detection - this spews a lot of useful information
226          * into the RBBM registers on a hang:
227          */
228         gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
229
230         /* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
231         gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
232
233         /* Enable Clock gating: */
234         if (adreno_is_a306(adreno_gpu))
235                 gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
236         else if (adreno_is_a320(adreno_gpu))
237                 gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
238         else if (adreno_is_a330v2(adreno_gpu))
239                 gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
240         else if (adreno_is_a330(adreno_gpu))
241                 gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
242
243         if (adreno_is_a330v2(adreno_gpu))
244                 gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
245         else if (adreno_is_a330(adreno_gpu))
246                 gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
247
248         /* Set the OCMEM base address for A330, etc */
249         if (a3xx_gpu->ocmem.hdl) {
250                 gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
251                         (unsigned int)(a3xx_gpu->ocmem.base >> 14));
252         }
253
254         /* Turn on performance counters: */
255         gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
256
257         /* Enable the perfcntrs that we use.. */
258         for (i = 0; i < gpu->num_perfcntrs; i++) {
259                 const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
260                 gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
261         }
262
263         gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
264
265         ret = adreno_hw_init(gpu);
266         if (ret)
267                 return ret;
268
269         /*
270          * Use the default ringbuffer size and block size but disable the RPTR
271          * shadow
272          */
273         gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
274                 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
275
276         /* Set the ringbuffer address */
277         gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
278
279         /* setup access protection: */
280         gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
281
282         /* RBBM registers */
283         gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
284         gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
285         gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
286         gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
287         gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
288         gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
289
290         /* CP registers */
291         gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
292         gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
293         gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
294         gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
295         gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
296
297         /* RB registers */
298         gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
299
300         /* VBIF registers */
301         gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
302
303         /* NOTE: PM4/micro-engine firmware registers look to be the same
304          * for a2xx and a3xx.. we could possibly push that part down to
305          * adreno_gpu base class.  Or push both PM4 and PFP but
306          * parameterize the pfp ucode addr/data registers..
307          */
308
309         /* Load PM4: */
310         ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
311         len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
312         DBG("loading PM4 ucode version: %x", ptr[1]);
313
314         gpu_write(gpu, REG_AXXX_CP_DEBUG,
315                         AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
316                         AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
317         gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
318         for (i = 1; i < len; i++)
319                 gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
320
321         /* Load PFP: */
322         ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
323         len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
324         DBG("loading PFP ucode version: %x", ptr[5]);
325
326         gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
327         for (i = 1; i < len; i++)
328                 gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
329
330         /* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
331         if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
332                         adreno_is_a320(adreno_gpu)) {
333                 gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
334                                 AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
335                                 AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
336                                 AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
337         } else if (adreno_is_a330(adreno_gpu)) {
338                 /* NOTE: this (value take from downstream android driver)
339                  * includes some bits outside of the known bitfields.  But
340                  * A330 has this "MERCIU queue" thing too, which might
341                  * explain a new bitfield or reshuffling:
342                  */
343                 gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
344         }
345
346         /* clear ME_HALT to start micro engine */
347         gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
348
349         return a3xx_me_init(gpu) ? 0 : -EINVAL;
350 }
351
352 static void a3xx_recover(struct msm_gpu *gpu)
353 {
354         int i;
355
356         adreno_dump_info(gpu);
357
358         for (i = 0; i < 8; i++) {
359                 printk("CP_SCRATCH_REG%d: %u\n", i,
360                         gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
361         }
362
363         /* dump registers before resetting gpu, if enabled: */
364         if (hang_debug)
365                 a3xx_dump(gpu);
366
367         gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
368         gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
369         gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
370         adreno_recover(gpu);
371 }
372
373 static void a3xx_destroy(struct msm_gpu *gpu)
374 {
375         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
376         struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
377
378         DBG("%s", gpu->name);
379
380         adreno_gpu_cleanup(adreno_gpu);
381
382         adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem);
383
384         kfree(a3xx_gpu);
385 }
386
387 static bool a3xx_idle(struct msm_gpu *gpu)
388 {
389         /* wait for ringbuffer to drain: */
390         if (!adreno_idle(gpu, gpu->rb[0]))
391                 return false;
392
393         /* then wait for GPU to finish: */
394         if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
395                         A3XX_RBBM_STATUS_GPU_BUSY))) {
396                 DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
397
398                 /* TODO maybe we need to reset GPU here to recover from hang? */
399                 return false;
400         }
401
402         return true;
403 }
404
405 static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
406 {
407         uint32_t status;
408
409         status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
410         DBG("%s: %08x", gpu->name, status);
411
412         // TODO
413
414         gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
415
416         msm_gpu_retire(gpu);
417
418         return IRQ_HANDLED;
419 }
420
421 static const unsigned int a3xx_registers[] = {
422         0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
423         0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
424         0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
425         0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
426         0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
427         0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
428         0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
429         0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
430         0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
431         0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
432         0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
433         0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
434         0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
435         0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
436         0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
437         0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
438         0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
439         0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
440         0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
441         0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
442         0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
443         0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
444         0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
445         0x22ff, 0x22ff, 0x2340, 0x2343, 0x2440, 0x2440, 0x2444, 0x2444,
446         0x2448, 0x244d, 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470,
447         0x2472, 0x2472, 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3,
448         0x24e4, 0x24ef, 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e,
449         0x2510, 0x2511, 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea,
450         0x25ec, 0x25ed, 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617,
451         0x261a, 0x261a, 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0,
452         0x26c4, 0x26ce, 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9,
453         0x26ec, 0x26ec, 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
454         0x300c, 0x300e, 0x301c, 0x301d, 0x302a, 0x302a, 0x302c, 0x302d,
455         0x3030, 0x3031, 0x3034, 0x3036, 0x303c, 0x303c, 0x305e, 0x305f,
456         ~0   /* sentinel */
457 };
458
459 /* would be nice to not have to duplicate the _show() stuff with printk(): */
460 static void a3xx_dump(struct msm_gpu *gpu)
461 {
462         printk("status:   %08x\n",
463                         gpu_read(gpu, REG_A3XX_RBBM_STATUS));
464         adreno_dump(gpu);
465 }
466
467 static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
468 {
469         struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
470
471         if (!state)
472                 return ERR_PTR(-ENOMEM);
473
474         adreno_gpu_state_get(gpu, state);
475
476         state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
477
478         return state;
479 }
480
481 static u32 a3xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
482 {
483         ring->memptrs->rptr = gpu_read(gpu, REG_AXXX_CP_RB_RPTR);
484         return ring->memptrs->rptr;
485 }
486
487 static const struct adreno_gpu_funcs funcs = {
488         .base = {
489                 .get_param = adreno_get_param,
490                 .hw_init = a3xx_hw_init,
491                 .pm_suspend = msm_gpu_pm_suspend,
492                 .pm_resume = msm_gpu_pm_resume,
493                 .recover = a3xx_recover,
494                 .submit = a3xx_submit,
495                 .active_ring = adreno_active_ring,
496                 .irq = a3xx_irq,
497                 .destroy = a3xx_destroy,
498 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
499                 .show = adreno_show,
500 #endif
501                 .gpu_state_get = a3xx_gpu_state_get,
502                 .gpu_state_put = adreno_gpu_state_put,
503                 .create_address_space = adreno_iommu_create_address_space,
504                 .get_rptr = a3xx_get_rptr,
505         },
506 };
507
508 static const struct msm_gpu_perfcntr perfcntrs[] = {
509         { REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
510                         SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
511         { REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
512                         SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
513 };
514
515 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
516 {
517         struct a3xx_gpu *a3xx_gpu = NULL;
518         struct adreno_gpu *adreno_gpu;
519         struct msm_gpu *gpu;
520         struct msm_drm_private *priv = dev->dev_private;
521         struct platform_device *pdev = priv->gpu_pdev;
522         struct icc_path *ocmem_icc_path;
523         struct icc_path *icc_path;
524         int ret;
525
526         if (!pdev) {
527                 DRM_DEV_ERROR(dev->dev, "no a3xx device\n");
528                 ret = -ENXIO;
529                 goto fail;
530         }
531
532         a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
533         if (!a3xx_gpu) {
534                 ret = -ENOMEM;
535                 goto fail;
536         }
537
538         adreno_gpu = &a3xx_gpu->base;
539         gpu = &adreno_gpu->base;
540
541         gpu->perfcntrs = perfcntrs;
542         gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
543
544         adreno_gpu->registers = a3xx_registers;
545
546         ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
547         if (ret)
548                 goto fail;
549
550         /* if needed, allocate gmem: */
551         if (adreno_is_a330(adreno_gpu)) {
552                 ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev,
553                                             adreno_gpu, &a3xx_gpu->ocmem);
554                 if (ret)
555                         goto fail;
556         }
557
558         if (!gpu->aspace) {
559                 /* TODO we think it is possible to configure the GPU to
560                  * restrict access to VRAM carveout.  But the required
561                  * registers are unknown.  For now just bail out and
562                  * limp along with just modesetting.  If it turns out
563                  * to not be possible to restrict access, then we must
564                  * implement a cmdstream validator.
565                  */
566                 DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
567                 ret = -ENXIO;
568                 goto fail;
569         }
570
571         icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
572         ret = IS_ERR(icc_path);
573         if (ret)
574                 goto fail;
575
576         ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
577         ret = IS_ERR(ocmem_icc_path);
578         if (ret) {
579                 /* allow -ENODATA, ocmem icc is optional */
580                 if (ret != -ENODATA)
581                         goto fail;
582                 ocmem_icc_path = NULL;
583         }
584
585
586         /*
587          * Set the ICC path to maximum speed for now by multiplying the fastest
588          * frequency by the bus width (8). We'll want to scale this later on to
589          * improve battery life.
590          */
591         icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
592         icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
593
594         return gpu;
595
596 fail:
597         if (a3xx_gpu)
598                 a3xx_destroy(&a3xx_gpu->base.base);
599
600         return ERR_PTR(ret);
601 }