drm/amdgpu: Add common helper to reset ras error
authorHawking Zhang <Hawking.Zhang@amd.com>
Fri, 3 Feb 2023 08:10:37 +0000 (16:10 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 9 Jun 2023 13:52:54 +0000 (09:52 -0400)
Add common helper to reset ras error status. It
applies to IP blocks that follow the new ras error
logging register design, and need to write 0 to
reset the error status. For IP blocks that don't
support the new design, please still implement ip
specific helper.

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h

index 57e86af..8a16a06 100644 (file)
@@ -3222,3 +3222,23 @@ void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev,
                }
        }
 }
+
+void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
+                                          const struct amdgpu_ras_err_status_reg_entry *reg_list,
+                                          uint32_t reg_list_size,
+                                          uint32_t instance)
+{
+       uint32_t err_status_lo_offset, err_status_hi_offset;
+       uint32_t i;
+
+       for (i = 0; i < reg_list_size; i++) {
+               err_status_lo_offset =
+                       AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
+                                                   reg_list[i].seg_lo, reg_list[i].reg_lo);
+               err_status_hi_offset =
+                       AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
+                                                   reg_list[i].seg_hi, reg_list[i].reg_hi);
+               WREG32(err_status_lo_offset, 0);
+               WREG32(err_status_hi_offset, 0);
+       }
+}
index c820af7..e96333d 100644 (file)
@@ -750,4 +750,8 @@ void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev,
                                           uint32_t instance,
                                           uint32_t err_type,
                                           unsigned long *err_count);
+void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
+                                          const struct amdgpu_ras_err_status_reg_entry *reg_list,
+                                          uint32_t reg_list_size,
+                                          uint32_t instance);
 #endif