drm/amdgpu: VCN 3.0 multiple queue ring reset
authorSonny Jiang <sonny.jiang@amd.com>
Fri, 27 Nov 2020 22:15:18 +0000 (17:15 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 9 Dec 2020 04:01:52 +0000 (23:01 -0500)
Add firmware write/read point reset sync through shared memory, port from vcn2.5.

Signed-off-by: Sonny Jiang <sonny.jiang@amd.com>
Reviewed-by: James Zhu <James.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c

index 4f718ee..def5839 100644 (file)
@@ -237,7 +237,8 @@ static int vcn_v3_0_sw_init(void *handle)
                }
 
                fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
-               fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SW_RING_FLAG);
+               fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SW_RING_FLAG) |
+                                            cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG);
                fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED);
        }
 
@@ -935,6 +936,7 @@ static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
 
 static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
 {
+       volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
        struct amdgpu_ring *ring;
        uint32_t rb_bufsz, tmp;
 
@@ -1048,6 +1050,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
        WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
                UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
                ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+       fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
 
        /* set the write pointer delay */
        WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
@@ -1071,6 +1074,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
        WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,
                lower_32_bits(ring->wptr));
 
+       fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
        /* Unstall DPG */
        WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
                0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
@@ -1080,6 +1084,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
 
 static int vcn_v3_0_start(struct amdgpu_device *adev)
 {
+       volatile struct amdgpu_fw_shared *fw_shared;
        struct amdgpu_ring *ring;
        uint32_t rb_bufsz, tmp;
        int i, j, k, r;
@@ -1222,6 +1227,9 @@ static int vcn_v3_0_start(struct amdgpu_device *adev)
                tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
                WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
 
+               fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
+               fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
+
                /* programm the RB_BASE for ring buffer */
                WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
                        lower_32_bits(ring->gpu_addr));
@@ -1234,19 +1242,25 @@ static int vcn_v3_0_start(struct amdgpu_device *adev)
                ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
                WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
                        lower_32_bits(ring->wptr));
+               fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
+
+               fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
                ring = &adev->vcn.inst[i].ring_enc[0];
                WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
                WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
                WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
                WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
                WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
+               fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
 
+               fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
                ring = &adev->vcn.inst[i].ring_enc[1];
                WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
                WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
                WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
                WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
                WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
+               fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
        }
 
        return 0;
@@ -1595,6 +1609,7 @@ static int vcn_v3_0_stop(struct amdgpu_device *adev)
 static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
                   int inst_idx, struct dpg_pause_state *new_state)
 {
+       volatile struct amdgpu_fw_shared *fw_shared;
        struct amdgpu_ring *ring;
        uint32_t reg_data = 0;
        int ret_code;
@@ -1626,6 +1641,8 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
                                        ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
 
                                /* Restore */
+                               fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
+                               fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
                                ring = &adev->vcn.inst[inst_idx].ring_enc[0];
                                ring->wptr = 0;
                                WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
@@ -1633,7 +1650,9 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
                                WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
                                WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
                                WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+                               fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
 
+                               fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
                                ring = &adev->vcn.inst[inst_idx].ring_enc[1];
                                ring->wptr = 0;
                                WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
@@ -1641,6 +1660,7 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
                                WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
                                WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
                                WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+                               fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
 
                                /* Unstall DPG */
                                WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),