drm/amdgpu: Don't query CE and UE errors
authorLuben Tuikov <luben.tuikov@amd.com>
Wed, 12 May 2021 16:33:23 +0000 (12:33 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 27 May 2021 16:22:19 +0000 (12:22 -0400)
On QUERY2 IOCTL don't query counts of correctable
and uncorrectable errors, since when RAS is
enabled and supported on Vega20 server boards,
this takes insurmountably long time, in O(n^3),
which slows the system down to the point of it
being unusable when we have GUI up.

Fixes: ae363a212b14 ("drm/amdgpu: Add a new flag to AMDGPU_CTX_OP_QUERY_STATE2")
Cc: Alexander Deucher <Alexander.Deucher@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
Reviewed-by: Alexander Deucher <Alexander.Deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index fc83445..bb0cfe8 100644 (file)
@@ -337,7 +337,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
 {
        struct amdgpu_ctx *ctx;
        struct amdgpu_ctx_mgr *mgr;
-       unsigned long ras_counter;
 
        if (!fpriv)
                return -EINVAL;
@@ -362,21 +361,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
        if (atomic_read(&ctx->guilty))
                out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
 
-       /*query ue count*/
-       ras_counter = amdgpu_ras_query_error_count(adev, false);
-       /*ras counter is monotonic increasing*/
-       if (ras_counter != ctx->ras_counter_ue) {
-               out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
-               ctx->ras_counter_ue = ras_counter;
-       }
-
-       /*query ce count*/
-       ras_counter = amdgpu_ras_query_error_count(adev, true);
-       if (ras_counter != ctx->ras_counter_ce) {
-               out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
-               ctx->ras_counter_ce = ras_counter;
-       }
-
        mutex_unlock(&mgr->lock);
        return 0;
 }