drm/amdgpu: set RAS fed status for more cases
authorTao Zhou <tao.zhou1@amd.com>
Thu, 6 Jun 2024 03:20:57 +0000 (11:20 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 14 Jun 2024 20:18:26 +0000 (16:18 -0400)
Indicate fatal error for each RAS block and NBIO.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c

index 02d9ef9..68e9935 100644 (file)
@@ -2131,6 +2131,7 @@ static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj,
        /* Let IP handle its data, maybe we need get the output
         * from the callback to update the error type/count, etc
         */
+       amdgpu_ras_set_fed(obj->adev, true);
        ret = data->cb(obj->adev, &err_data, entry);
        /* ue will trigger an interrupt, and in that case
         * we need do a reset to recovery the whole system.
index 32cc60c..8d80df9 100644 (file)
@@ -414,6 +414,7 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
                /* ras_controller_int is dedicated for nbif ras error,
                 * not the global interrupt for sync flood
                 */
+               amdgpu_ras_set_fed(adev, true);
                amdgpu_ras_reset_gpu(adev);
        }