From 1f0d8e3781f40c6cae7cb68a4cccfc54dd4ad3a1 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Wed, 24 Mar 2021 11:36:33 -0400 Subject: [PATCH] drm/amdgpu: Reset RAS error count and status regs Reset the RAS error count and error status registers after reading to prevent over reporting error counts on Aldebaran. Signed-off-by: Mukul Joshi Reviewed-By: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 7438d4e84776..b0d2fc9454ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -501,6 +501,12 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev, if (amdgpu_ras_query_error_status(obj->adev, &info)) return -EINVAL; + + if (obj->adev->asic_type == CHIP_ALDEBARAN) { + if (amdgpu_ras_reset_error_status(obj->adev, info.head.block)) + DRM_WARN("Failed to reset error counter and error status"); + } + return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count, "ce", info.ce_count); } -- 2.20.1