drm/msm/a6xx: add support for Adreno 660 GPU
[linux-2.6-microblaze.git] / drivers / gpu / drm / msm / adreno / a6xx_gpu.c
index d553f62..eacc6ef 100644 (file)
@@ -149,7 +149,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 
        a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx);
 
-       get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO,
+       get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
                rbmemptr_stats(ring, index, cpcycles_start));
 
        /*
@@ -157,7 +157,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
         * GPU registers so we need to add 0x1a800 to the register value on A630
         * to get the right value from PM4.
         */
-       get_stats_counter(ring, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L + 0x1a800,
+       get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
                rbmemptr_stats(ring, index, alwayson_start));
 
        /* Invalidate CCU depth and color */
@@ -185,9 +185,9 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
                }
        }
 
-       get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO,
+       get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
                rbmemptr_stats(ring, index, cpcycles_end));
-       get_stats_counter(ring, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L + 0x1a800,
+       get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
                rbmemptr_stats(ring, index, alwayson_end));
 
        /* Write the fence to the scratch register */
@@ -206,8 +206,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
        OUT_RING(ring, submit->seqno);
 
        trace_msm_gpu_submit_flush(submit,
-               gmu_read64(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L,
-                       REG_A6XX_GMU_ALWAYS_ON_COUNTER_H));
+               gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
+                       REG_A6XX_CP_ALWAYS_ON_COUNTER_HI));
 
        a6xx_flush(gpu, ring);
 }
@@ -427,6 +427,59 @@ const struct adreno_reglist a650_hwcg[] = {
        {},
 };
 
+const struct adreno_reglist a660_hwcg[] = {
+       {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+       {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+       {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+       {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+       {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+       {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+       {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
+       {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
+       {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+       {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+       {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
+       {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
+       {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+       {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+       {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
+       {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
+       {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+       {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
+       {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
+       {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
+       {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022},
+       {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
+       {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
+       {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
+       {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+       {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+       {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
+       {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
+       {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
+       {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+       {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
+       {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+       {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+       {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+       {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+       {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+       {REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
+       {REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
+       {REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
+       {REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000},
+       {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+       {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
+       {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+       {REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
+       {REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
+       {REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
+       {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
+       {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
+       {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
+       {},
+};
+
 static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
 {
        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -462,6 +515,162 @@ static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
        gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0);
 }
 
+/* For a615, a616, a618, A619, a630, a640 and a680 */
+static const u32 a6xx_protect[] = {
+       A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
+       A6XX_PROTECT_RDONLY(0x00501, 0x0005),
+       A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
+       A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
+       A6XX_PROTECT_NORDWR(0x00510, 0x0000),
+       A6XX_PROTECT_NORDWR(0x00534, 0x0000),
+       A6XX_PROTECT_NORDWR(0x00800, 0x0082),
+       A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
+       A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
+       A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
+       A6XX_PROTECT_NORDWR(0x00900, 0x004d),
+       A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
+       A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
+       A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
+       A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
+       A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
+       A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
+       A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
+       A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
+       A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
+       A6XX_PROTECT_NORDWR(0x09624, 0x01db),
+       A6XX_PROTECT_NORDWR(0x09e70, 0x0001),
+       A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
+       A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
+       A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
+       A6XX_PROTECT_NORDWR(0x0ae50, 0x032f),
+       A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
+       A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
+       A6XX_PROTECT_NORDWR(0x0be20, 0x17df),
+       A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
+       A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
+       A6XX_PROTECT_NORDWR(0x11c00, 0x0000), /* note: infinite range */
+};
+
+/* These are for a620 and a650 */
+static const u32 a650_protect[] = {
+       A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
+       A6XX_PROTECT_RDONLY(0x00501, 0x0005),
+       A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
+       A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
+       A6XX_PROTECT_NORDWR(0x00510, 0x0000),
+       A6XX_PROTECT_NORDWR(0x00534, 0x0000),
+       A6XX_PROTECT_NORDWR(0x00800, 0x0082),
+       A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
+       A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
+       A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
+       A6XX_PROTECT_NORDWR(0x00900, 0x004d),
+       A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
+       A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
+       A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
+       A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
+       A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
+       A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
+       A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
+       A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
+       A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
+       A6XX_PROTECT_NORDWR(0x08e80, 0x027f),
+       A6XX_PROTECT_NORDWR(0x09624, 0x01db),
+       A6XX_PROTECT_NORDWR(0x09e60, 0x0011),
+       A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
+       A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
+       A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
+       A6XX_PROTECT_NORDWR(0x0ae50, 0x032f),
+       A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
+       A6XX_PROTECT_NORDWR(0x0b608, 0x0007),
+       A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
+       A6XX_PROTECT_NORDWR(0x0be20, 0x17df),
+       A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
+       A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
+       A6XX_PROTECT_NORDWR(0x18400, 0x1fff),
+       A6XX_PROTECT_NORDWR(0x1a800, 0x1fff),
+       A6XX_PROTECT_NORDWR(0x1f400, 0x0443),
+       A6XX_PROTECT_RDONLY(0x1f844, 0x007b),
+       A6XX_PROTECT_NORDWR(0x1f887, 0x001b),
+       A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */
+};
+
+/* These are for a635 and a660 */
+static const u32 a660_protect[] = {
+       A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
+       A6XX_PROTECT_RDONLY(0x00501, 0x0005),
+       A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
+       A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
+       A6XX_PROTECT_NORDWR(0x00510, 0x0000),
+       A6XX_PROTECT_NORDWR(0x00534, 0x0000),
+       A6XX_PROTECT_NORDWR(0x00800, 0x0082),
+       A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
+       A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
+       A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
+       A6XX_PROTECT_NORDWR(0x00900, 0x004d),
+       A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
+       A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
+       A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
+       A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
+       A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
+       A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
+       A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
+       A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
+       A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
+       A6XX_PROTECT_NORDWR(0x08e80, 0x027f),
+       A6XX_PROTECT_NORDWR(0x09624, 0x01db),
+       A6XX_PROTECT_NORDWR(0x09e60, 0x0011),
+       A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
+       A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
+       A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
+       A6XX_PROTECT_NORDWR(0x0ae50, 0x012f),
+       A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
+       A6XX_PROTECT_NORDWR(0x0b608, 0x0006),
+       A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
+       A6XX_PROTECT_NORDWR(0x0be20, 0x015f),
+       A6XX_PROTECT_NORDWR(0x0d000, 0x05ff),
+       A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
+       A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
+       A6XX_PROTECT_NORDWR(0x18400, 0x1fff),
+       A6XX_PROTECT_NORDWR(0x1a400, 0x1fff),
+       A6XX_PROTECT_NORDWR(0x1f400, 0x0443),
+       A6XX_PROTECT_RDONLY(0x1f844, 0x007b),
+       A6XX_PROTECT_NORDWR(0x1f860, 0x0000),
+       A6XX_PROTECT_NORDWR(0x1f887, 0x001b),
+       A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */
+};
+
+static void a6xx_set_cp_protect(struct msm_gpu *gpu)
+{
+       struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+       const u32 *regs = a6xx_protect;
+       unsigned i, count = ARRAY_SIZE(a6xx_protect), count_max = 32;
+
+       BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32);
+       BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48);
+
+       if (adreno_is_a650(adreno_gpu)) {
+               regs = a650_protect;
+               count = ARRAY_SIZE(a650_protect);
+               count_max = 48;
+       } else if (adreno_is_a660(adreno_gpu)) {
+               regs = a660_protect;
+               count = ARRAY_SIZE(a660_protect);
+               count_max = 48;
+       }
+
+       /*
+        * Enable access protection to privileged registers, fault on an access
+        * protect violation and select the last span to protect from the start
+        * address all the way to the end of the register address space
+        */
+       gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL, BIT(0) | BIT(1) | BIT(3));
+
+       for (i = 0; i < count - 1; i++)
+               gpu_write(gpu, REG_A6XX_CP_PROTECT(i), regs[i]);
+       /* last CP_PROTECT to have "infinite" length on the last entry */
+       gpu_write(gpu, REG_A6XX_CP_PROTECT(count_max - 1), regs[i]);
+}
+
 static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
 {
        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -477,7 +686,7 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
        if (adreno_is_a640(adreno_gpu))
                amsbc = 1;
 
-       if (adreno_is_a650(adreno_gpu)) {
+       if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) {
                /* TODO: get ddr type from bootloader and use 2 for LPDDR4 */
                lower_bit = 3;
                amsbc = 1;
@@ -489,7 +698,7 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
                rgb565_predicator << 11 | amsbc << 4 | lower_bit << 1);
        gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, lower_bit << 1);
        gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
-               uavflagprd_inv >> 4 | lower_bit << 1);
+               uavflagprd_inv << 4 | lower_bit << 1);
        gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, lower_bit << 21);
 }
 
@@ -541,6 +750,11 @@ static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
         * Targets up to a640 (a618, a630 and a640) need to check for a
         * microcode version that is patched to support the whereami opcode or
         * one that is new enough to include it by default.
+        *
+        * a650 tier targets don't need whereami but still need to be
+        * equal to or newer than 0.95 for other security fixes
+        *
+        * a660 targets have all the critical security fixes from the start
         */
        if (adreno_is_a618(adreno_gpu) || adreno_is_a630(adreno_gpu) ||
                adreno_is_a640(adreno_gpu)) {
@@ -564,27 +778,20 @@ static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
                DRM_DEV_ERROR(&gpu->pdev->dev,
                        "a630 SQE ucode is too old. Have version %x need at least %x\n",
                        buf[0] & 0xfff, 0x190);
-       }  else {
-               /*
-                * a650 tier targets don't need whereami but still need to be
-                * equal to or newer than 0.95 for other security fixes
-                */
-               if (adreno_is_a650(adreno_gpu)) {
-                       if ((buf[0] & 0xfff) >= 0x095) {
-                               ret = true;
-                               goto out;
-                       }
-
-                       DRM_DEV_ERROR(&gpu->pdev->dev,
-                               "a650 SQE ucode is too old. Have version %x need at least %x\n",
-                               buf[0] & 0xfff, 0x095);
+       } else if (adreno_is_a650(adreno_gpu)) {
+               if ((buf[0] & 0xfff) >= 0x095) {
+                       ret = true;
+                       goto out;
                }
 
-               /*
-                * When a660 is added those targets should return true here
-                * since those have all the critical security fixes built in
-                * from the start
-                */
+               DRM_DEV_ERROR(&gpu->pdev->dev,
+                       "a650 SQE ucode is too old. Have version %x need at least %x\n",
+                       buf[0] & 0xfff, 0x095);
+       } else if (adreno_is_a660(adreno_gpu)) {
+               ret = true;
+       } else {
+               DRM_DEV_ERROR(&gpu->pdev->dev,
+                       "unknown GPU, add it to a6xx_ucode_check_version()!!\n");
        }
 out:
        msm_gem_put_vaddr(obj);
@@ -620,8 +827,8 @@ static int a6xx_ucode_init(struct msm_gpu *gpu)
                }
        }
 
-       gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE_LO,
-               REG_A6XX_CP_SQE_INSTR_BASE_HI, a6xx_gpu->sqe_iova);
+       gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE,
+               REG_A6XX_CP_SQE_INSTR_BASE+1, a6xx_gpu->sqe_iova);
 
        return 0;
 }
@@ -690,7 +897,7 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
        a6xx_set_hwcg(gpu, true);
 
        /* VBIF/GBIF start*/
-       if (adreno_is_a640(adreno_gpu) || adreno_is_a650(adreno_gpu)) {
+       if (adreno_is_a640(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) {
                gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620);
                gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620);
                gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620);
@@ -715,7 +922,7 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
        gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_LO, 0xfffff000);
        gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_HI, 0x0001ffff);
 
-       if (!adreno_is_a650(adreno_gpu)) {
+       if (!adreno_is_a650_family(adreno_gpu)) {
                /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
                gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO,
                        REG_A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x00100000);
@@ -728,22 +935,27 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
        gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804);
        gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4);
 
-       if (adreno_is_a640(adreno_gpu) || adreno_is_a650(adreno_gpu))
+       if (adreno_is_a640(adreno_gpu) || adreno_is_a650_family(adreno_gpu))
                gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140);
        else
                gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0);
        gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
 
+       if (adreno_is_a660(adreno_gpu))
+               gpu_write(gpu, REG_A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020);
+
        /* Setting the mem pool size */
        gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128);
 
-       /* Setting the primFifo thresholds default values */
-       if (adreno_is_a650(adreno_gpu))
-               gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300000);
+       /* Setting the primFifo thresholds default values,
+        * and vccCacheSkipDis=1 bit (0x200) for A640 and newer
+       */
+       if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu))
+               gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200);
        else if (adreno_is_a640(adreno_gpu))
-               gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00200000);
+               gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00200200);
        else
-               gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, (0x300 << 11));
+               gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00180000);
 
        /* Set the AHB default slave response to "ERROR" */
        gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1);
@@ -752,7 +964,7 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
        gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1);
 
        /* Select CP0 to always count cycles */
-       gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
+       gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL(0), PERF_CP_ALWAYS_COUNT);
 
        a6xx_set_ubwc_config(gpu);
 
@@ -763,7 +975,7 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
        gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, 1);
 
        /* Set weights for bicubic filtering */
-       if (adreno_is_a650(adreno_gpu)) {
+       if (adreno_is_a650_family(adreno_gpu)) {
                gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0);
                gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1,
                        0x3fe05ff4);
@@ -776,41 +988,14 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
        }
 
        /* Protect registers from the CP */
-       gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL, 0x00000003);
-
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(0),
-               A6XX_PROTECT_RDONLY(0x600, 0x51));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(1), A6XX_PROTECT_RW(0xae50, 0x2));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(2), A6XX_PROTECT_RW(0x9624, 0x13));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(3), A6XX_PROTECT_RW(0x8630, 0x8));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(4), A6XX_PROTECT_RW(0x9e70, 0x1));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(5), A6XX_PROTECT_RW(0x9e78, 0x187));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(6), A6XX_PROTECT_RW(0xf000, 0x810));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(7),
-               A6XX_PROTECT_RDONLY(0xfc00, 0x3));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(8), A6XX_PROTECT_RW(0x50e, 0x0));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(9), A6XX_PROTECT_RDONLY(0x50f, 0x0));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(10), A6XX_PROTECT_RW(0x510, 0x0));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(11),
-               A6XX_PROTECT_RDONLY(0x0, 0x4f9));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(12),
-               A6XX_PROTECT_RDONLY(0x501, 0xa));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(13),
-               A6XX_PROTECT_RDONLY(0x511, 0x44));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(14), A6XX_PROTECT_RW(0xe00, 0xe));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(15), A6XX_PROTECT_RW(0x8e00, 0x0));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(16), A6XX_PROTECT_RW(0x8e50, 0xf));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(17), A6XX_PROTECT_RW(0xbe02, 0x0));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(18),
-               A6XX_PROTECT_RW(0xbe20, 0x11f3));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(19), A6XX_PROTECT_RW(0x800, 0x82));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(20), A6XX_PROTECT_RW(0x8a0, 0x8));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(21), A6XX_PROTECT_RW(0x8ab, 0x19));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(22), A6XX_PROTECT_RW(0x900, 0x4d));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(23), A6XX_PROTECT_RW(0x98d, 0x76));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(24),
-                       A6XX_PROTECT_RDONLY(0x980, 0x4));
-       gpu_write(gpu, REG_A6XX_CP_PROTECT(25), A6XX_PROTECT_RW(0xa630, 0x0));
+       a6xx_set_cp_protect(gpu);
+
+       if (adreno_is_a660(adreno_gpu)) {
+               gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1);
+               gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0);
+               /* Set dualQ + disable afull for A660 GPU but not for A635 */
+               gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906);
+       }
 
        /* Enable expanded apriv for targets that support it */
        if (gpu->hw_apriv) {
@@ -852,7 +1037,7 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
                if (!a6xx_gpu->shadow_bo) {
                        a6xx_gpu->shadow = msm_gem_kernel_new_locked(gpu->dev,
                                sizeof(u32) * gpu->nr_rings,
-                               MSM_BO_UNCACHED | MSM_BO_MAP_PRIV,
+                               MSM_BO_WC | MSM_BO_MAP_PRIV,
                                gpu->aspace, &a6xx_gpu->shadow_bo,
                                &a6xx_gpu->shadow_iova);
 
@@ -1153,10 +1338,6 @@ static void a6xx_llc_slices_init(struct platform_device *pdev,
 {
        struct device_node *phandle;
 
-       a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx");
-       if (IS_ERR(a6xx_gpu->llc_mmio))
-               return;
-
        /*
         * There is a different programming path for targets with an mmu500
         * attached, so detect if that is the case
@@ -1166,6 +1347,11 @@ static void a6xx_llc_slices_init(struct platform_device *pdev,
                of_device_is_compatible(phandle, "arm,mmu-500"));
        of_node_put(phandle);
 
+       if (a6xx_gpu->have_mmu500)
+               a6xx_gpu->llc_mmio = NULL;
+       else
+               a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx");
+
        a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
        a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
 
@@ -1210,7 +1396,7 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
        if (ret)
                return ret;
 
-       if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami)
+       if (a6xx_gpu->shadow_bo)
                for (i = 0; i < gpu->nr_rings; i++)
                        a6xx_gpu->shadow[i] = 0;
 
@@ -1265,9 +1451,6 @@ static void a6xx_destroy(struct msm_gpu *gpu)
 
        adreno_gpu_cleanup(adreno_gpu);
 
-       if (a6xx_gpu->opp_table)
-               dev_pm_opp_put_supported_hw(a6xx_gpu->opp_table);
-
        kfree(a6xx_gpu);
 }
 
@@ -1400,7 +1583,6 @@ static u32 fuse_to_supp_hw(struct device *dev, u32 revn, u32 fuse)
 static int a6xx_set_supported_hw(struct device *dev, struct a6xx_gpu *a6xx_gpu,
                u32 revn)
 {
-       struct opp_table *opp_table;
        u32 supp_hw = UINT_MAX;
        u16 speedbin;
        int ret;
@@ -1423,11 +1605,10 @@ static int a6xx_set_supported_hw(struct device *dev, struct a6xx_gpu *a6xx_gpu,
        supp_hw = fuse_to_supp_hw(dev, revn, speedbin);
 
 done:
-       opp_table = dev_pm_opp_set_supported_hw(dev, &supp_hw, 1);
-       if (IS_ERR(opp_table))
-               return PTR_ERR(opp_table);
+       ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1);
+       if (ret)
+               return ret;
 
-       a6xx_gpu->opp_table = opp_table;
        return 0;
 }
 
@@ -1487,7 +1668,7 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
         */
        info = adreno_info(config->rev);
 
-       if (info && info->revn == 650)
+       if (info && (info->revn == 650 || info->revn == 660))
                adreno_gpu->base.hw_apriv = true;
 
        a6xx_llc_slices_init(pdev, a6xx_gpu);