drm/amdgpu: indirect register access for nv12 sriov
authorPeng Ju Zhou <PengJu.Zhou@amd.com>
Mon, 22 Mar 2021 07:18:01 +0000 (15:18 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 9 Apr 2021 20:50:17 +0000 (16:50 -0400)
1. expand rlcg interface for gc & mmhub indirect access
2. add rlcg interface for no kiq

v2: squash in fix for gfx9 (Changfeng)

Signed-off-by: Peng Ju Zhou <PengJu.Zhou@amd.com>
Reviewed-by: Emily.Deng <Emily.Deng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/amdgpu/soc15_common.h

index 060d0ae..438e2f7 100644 (file)
@@ -490,7 +490,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
            adev->gfx.rlc.funcs &&
            adev->gfx.rlc.funcs->is_rlcg_access_range) {
                if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
-                       return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
+                       return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v, 0);
        } else {
                writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
        }
index aeaaae7..4fc2ce8 100644 (file)
@@ -127,7 +127,8 @@ struct amdgpu_rlc_funcs {
        void (*reset)(struct amdgpu_device *adev);
        void (*start)(struct amdgpu_device *adev);
        void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid);
-       void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v);
+       void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag);
+       u32 (*rlcg_rreg)(struct amdgpu_device *adev, u32 offset, u32 flag);
        bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg);
 };
 
index e8c090f..196d9d2 100644 (file)
 #define mmGC_THROTTLE_CTRL_Sienna_Cichlid              0x2030
 #define mmGC_THROTTLE_CTRL_Sienna_Cichlid_BASE_IDX     0
 
+#define GFX_RLCG_GC_WRITE_OLD  (0x8 << 28)
+#define GFX_RLCG_GC_WRITE      (0x0 << 28)
+#define GFX_RLCG_GC_READ       (0x1 << 28)
+#define GFX_RLCG_MMHUB_WRITE   (0x2 << 28)
+
 MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
 MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
 MODULE_FIRMWARE("amdgpu/navi10_me.bin");
@@ -1418,38 +1423,127 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000)
 };
 
-static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
+static bool gfx_v10_is_rlcg_rw(struct amdgpu_device *adev, u32 offset, uint32_t *flag, bool write)
+{
+       /* always programed by rlcg, only for gc */
+       if (offset == SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI) ||
+           offset == SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO) ||
+           offset == SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH) ||
+           offset == SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL) ||
+           offset == SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX) ||
+           offset == SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL)) {
+               if (!amdgpu_sriov_reg_indirect_gc(adev))
+                       *flag = GFX_RLCG_GC_WRITE_OLD;
+               else
+                       *flag = write ? GFX_RLCG_GC_WRITE : GFX_RLCG_GC_READ;
+
+               return true;
+       }
+
+       /* currently support gc read/write, mmhub write */
+       if (offset >= SOC15_REG_OFFSET(GC, 0, mmSDMA0_DEC_START) &&
+           offset <= SOC15_REG_OFFSET(GC, 0, mmRLC_GTS_OFFSET_MSB)) {
+               if (amdgpu_sriov_reg_indirect_gc(adev))
+                       *flag = write ? GFX_RLCG_GC_WRITE : GFX_RLCG_GC_READ;
+               else
+                       return false;
+       } else {
+               if (amdgpu_sriov_reg_indirect_mmhub(adev))
+                       *flag = GFX_RLCG_MMHUB_WRITE;
+               else
+                       return false;
+       }
+
+       return true;
+}
+
+static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32_t flag)
 {
        static void *scratch_reg0;
        static void *scratch_reg1;
+       static void *scratch_reg2;
+       static void *scratch_reg3;
        static void *spare_int;
+       static uint32_t grbm_cntl;
+       static uint32_t grbm_idx;
        uint32_t i = 0;
        uint32_t retries = 50000;
+       u32 ret = 0;
+
+       scratch_reg0 = adev->rmmio +
+                      (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0) * 4;
+       scratch_reg1 = adev->rmmio +
+                      (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1) * 4;
+       scratch_reg2 = adev->rmmio +
+                      (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG2) * 4;
+       scratch_reg3 = adev->rmmio +
+                      (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3) * 4;
+       spare_int = adev->rmmio +
+                   (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT) * 4;
+
+       grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
+       grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
+
+       if (offset == grbm_cntl || offset == grbm_idx) {
+               if (offset  == grbm_cntl)
+                       writel(v, scratch_reg2);
+               else if (offset == grbm_idx)
+                       writel(v, scratch_reg3);
+
+               writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
+       } else {
+               writel(v, scratch_reg0);
+               writel(offset | flag, scratch_reg1);
+               writel(1, spare_int);
+               for (i = 0; i < retries; i++) {
+                       u32 tmp;
+
+                       tmp = readl(scratch_reg1);
+                       if (!(tmp & flag))
+                               break;
 
-       scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
-       scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
-       spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
+                       udelay(10);
+               }
 
-       if (amdgpu_sriov_runtime(adev)) {
-               pr_err("shouldn't call rlcg write register during runtime\n");
-               return;
+               if (i >= retries)
+                       pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
        }
 
-       writel(v, scratch_reg0);
-       writel(offset | 0x80000000, scratch_reg1);
-       writel(1, spare_int);
-       for (i = 0; i < retries; i++) {
-               u32 tmp;
+       ret = readl(scratch_reg0);
 
-               tmp = readl(scratch_reg1);
-               if (!(tmp & 0x80000000))
-                       break;
+       return ret;
+}
 
-               udelay(10);
+static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 value, u32 flag)
+{
+       uint32_t rlcg_flag;
+
+       if (amdgpu_sriov_fullaccess(adev) &&
+           gfx_v10_is_rlcg_rw(adev, offset, &rlcg_flag, 1)) {
+               gfx_v10_rlcg_rw(adev, offset, value, rlcg_flag);
+
+               return;
        }
+       if (flag & AMDGPU_REGS_NO_KIQ)
+               WREG32_NO_KIQ(offset, value);
+       else
+               WREG32(offset, value);
+}
+
+static u32 gfx_v10_rlcg_rreg(struct amdgpu_device *adev, u32 offset, u32 flag)
+{
+       uint32_t rlcg_flag;
 
-       if (i >= retries)
-               pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
+       if (amdgpu_sriov_fullaccess(adev) &&
+           gfx_v10_is_rlcg_rw(adev, offset, &rlcg_flag, 0))
+               return gfx_v10_rlcg_rw(adev, offset, 0, rlcg_flag);
+
+       if (flag & AMDGPU_REGS_NO_KIQ)
+               return RREG32_NO_KIQ(offset);
+       else
+               return RREG32(offset);
+
+       return 0;
 }
 
 static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] =
@@ -7884,6 +7978,7 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = {
        .start = gfx_v10_0_rlc_start,
        .update_spm_vmid = gfx_v10_0_update_spm_vmid,
        .rlcg_wreg = gfx_v10_rlcg_wreg,
+       .rlcg_rreg = gfx_v10_rlcg_rreg,
        .is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range,
 };
 
index 99f5843..2b7ab8f 100644 (file)
@@ -734,7 +734,7 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
        mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 };
 
-static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
+static void gfx_v9_0_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag)
 {
        static void *scratch_reg0;
        static void *scratch_reg1;
@@ -787,6 +787,20 @@ static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
 
 }
 
+static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag)
+{
+       if (amdgpu_sriov_fullaccess(adev)) {
+               gfx_v9_0_rlcg_rw(adev, offset, v, flag);
+
+               return;
+       }
+
+       if (flag & AMDGPU_REGS_NO_KIQ)
+               WREG32_NO_KIQ(offset, v);
+       else
+               WREG32(offset, v);
+}
+
 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
index 8cdf5d1..14bd794 100644 (file)
 })
 
 #define WREG32_RLC(reg, value) \
-       do {                                                    \
-               if (amdgpu_sriov_fullaccess(adev)) {    \
-                       uint32_t i = 0; \
-                       uint32_t retries = 50000;       \
-                       uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0;   \
-                       uint32_t r1 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1;   \
-                       uint32_t spare_int = adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT;  \
-                       WREG32(r0, value);      \
-                       WREG32(r1, (reg | 0x80000000)); \
-                       WREG32(spare_int, 0x1); \
-                       for (i = 0; i < retries; i++) { \
-                               u32 tmp = RREG32(r1);   \
-                               if (!(tmp & 0x80000000))        \
-                                       break;  \
-                               udelay(10);     \
-                       }       \
-                       if (i >= retries)       \
-                               pr_err("timeout: rlcg program reg:0x%05x failed !\n", reg);     \
-               } else {        \
-                       WREG32(reg, value); \
-               }       \
+       do { \
+               if (adev->gfx.rlc.funcs->rlcg_wreg) \
+                       adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, value, 0); \
+               else \
+                       WREG32(reg, value);     \
        } while (0)
 
 #define WREG32_RLC_EX(prefix, reg, value) \
        } while (0)
 
 #define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \
-       do {                                                    \
-               uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
-               if (amdgpu_sriov_fullaccess(adev)) {    \
-                       uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2;   \
-                       uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3;   \
-                       uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;   \
-                       uint32_t grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;   \
-                       if (target_reg == grbm_cntl) \
-                               WREG32(r2, value);      \
-                       else if (target_reg == grbm_idx) \
-                               WREG32(r3, value);      \
-                       WREG32(target_reg, value);      \
-               } else {        \
-                       WREG32(target_reg, value); \
-               }       \
+       WREG32_RLC((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value)
+
+#define RREG32_RLC(reg) \
+       (adev->gfx.rlc.funcs->rlcg_rreg ? \
+               adev->gfx.rlc.funcs->rlcg_rreg(adev, reg, 0) : RREG32(reg))
+
+#define WREG32_RLC_NO_KIQ(reg, value) \
+       do { \
+               if (adev->gfx.rlc.funcs->rlcg_wreg) \
+                       adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, value, AMDGPU_REGS_NO_KIQ); \
+               else \
+                       WREG32_NO_KIQ(reg, value);      \
        } while (0)
 
+#define RREG32_RLC_NO_KIQ(reg) \
+       (adev->gfx.rlc.funcs->rlcg_rreg ? \
+               adev->gfx.rlc.funcs->rlcg_rreg(adev, reg, AMDGPU_REGS_NO_KIQ) : RREG32_NO_KIQ(reg))
+
 #define WREG32_SOC15_RLC_SHADOW_EX(prefix, ip, inst, reg, value) \
        do {                                                    \
                uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
                }       \
        } while (0)
 
+#define RREG32_SOC15_RLC(ip, inst, reg) \
+       RREG32_RLC(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg)
+
 #define WREG32_SOC15_RLC(ip, inst, reg, value) \
        do {                                                    \
-                       uint32_t target_reg = adev->reg_offset[GC_HWIP][0][reg##_BASE_IDX] + reg;\
-                       WREG32_RLC(target_reg, value); \
+               uint32_t target_reg = adev->reg_offset[ip##_HWIP][0][reg##_BASE_IDX] + reg;\
+               WREG32_RLC(target_reg, value); \
        } while (0)
 
 #define WREG32_SOC15_RLC_EX(prefix, ip, inst, reg, value) \
        } while (0)
 
 #define WREG32_FIELD15_RLC(ip, idx, reg, field, val)   \
-    WREG32_RLC((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \
-    (RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \
-    & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+       WREG32_RLC((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \
+       (RREG32_RLC(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \
+       & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
 
 #define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \
-    WREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset), value)
+       WREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset), value)
+
+#define RREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset) \
+       RREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset))
 
 #endif