Revert "drm/amdgpu: let mode2 reset fallback to default when failure"
authorVictor Zhao <Victor.Zhao@amd.com>
Thu, 13 Oct 2022 03:06:33 +0000 (11:06 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 19 Oct 2022 02:08:33 +0000 (22:08 -0400)
This reverts commit dac6b80818ac2353631c5a33d140d8d5508e2957.

This commit reverted the AMDGPU_SKIP_MODE2_RESET as it conflicts with
the original design of reset handler. Will redesign it.

Fixes: dac6b80818ac23 ("drm/amdgpu: let mode2 reset fallback to default when failure")
Signed-off-by: Victor Zhao <Victor.Zhao@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c

index 03bbfaa..0561812 100644 (file)
@@ -134,7 +134,6 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
        reset_context.method = AMD_RESET_METHOD_NONE;
        reset_context.reset_req_dev = adev;
        clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-       clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
 
        amdgpu_device_gpu_recover(adev, NULL, &reset_context);
 }
index ab8f970..bb73fb4 100644 (file)
@@ -5210,7 +5210,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
        reset_context->job = job;
        reset_context->hive = hive;
-
        /*
         * Build list of devices to reset.
         * In case we are in XGMI hive mode, resort the device list
@@ -5337,11 +5336,8 @@ retry:   /* Rest of adevs pre asic reset from XGMI hive. */
                        amdgpu_ras_resume(adev);
        } else {
                r = amdgpu_do_asic_reset(device_list_handle, reset_context);
-               if (r && r == -EAGAIN) {
-                       set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags);
-                       adev->asic_reset_res = 0;
+               if (r && r == -EAGAIN)
                        goto retry;
-               }
 
                if (!r && gpu_reset_for_dev_remove)
                        goto recover_end;
@@ -5777,7 +5773,6 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
        reset_context.reset_req_dev = adev;
        set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
        set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
-       set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
 
        adev->no_hw_access = true;
        r = amdgpu_device_pre_asic_reset(adev, &reset_context);
index 46c9933..cd968e7 100644 (file)
@@ -72,7 +72,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
                reset_context.method = AMD_RESET_METHOD_NONE;
                reset_context.reset_req_dev = adev;
                clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-               clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
 
                r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context);
                if (r)
index 2dad7aa..75f1402 100644 (file)
@@ -1950,7 +1950,6 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
                reset_context.method = AMD_RESET_METHOD_NONE;
                reset_context.reset_req_dev = adev;
                clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-               clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
 
                amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
        }
index 831fb22..f778466 100644 (file)
@@ -74,9 +74,6 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
 {
        struct amdgpu_reset_handler *reset_handler = NULL;
 
-       if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags))
-               return -ENOSYS;
-
        if (adev->reset_cntl && adev->reset_cntl->get_reset_handler)
                reset_handler = adev->reset_cntl->get_reset_handler(
                        adev->reset_cntl, reset_context);
@@ -93,9 +90,6 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
        int ret;
        struct amdgpu_reset_handler *reset_handler = NULL;
 
-       if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags))
-               return -ENOSYS;
-
        if (adev->reset_cntl)
                reset_handler = adev->reset_cntl->get_reset_handler(
                        adev->reset_cntl, reset_context);
index f5318fe..f4a501f 100644 (file)
@@ -30,8 +30,7 @@ enum AMDGPU_RESET_FLAGS {
 
        AMDGPU_NEED_FULL_RESET = 0,
        AMDGPU_SKIP_HW_RESET = 1,
-       AMDGPU_SKIP_MODE2_RESET = 2,
-       AMDGPU_RESET_FOR_DEVICE_REMOVE = 3,
+       AMDGPU_RESET_FOR_DEVICE_REMOVE = 2,
 };
 
 struct amdgpu_reset_context {
index a2f04b2..12906ba 100644 (file)
@@ -290,7 +290,6 @@ flr_done:
                reset_context.method = AMD_RESET_METHOD_NONE;
                reset_context.reset_req_dev = adev;
                clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-               clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
 
                amdgpu_device_gpu_recover(adev, NULL, &reset_context);
        }
index a977f00..e07757e 100644 (file)
@@ -317,7 +317,6 @@ flr_done:
                reset_context.method = AMD_RESET_METHOD_NONE;
                reset_context.reset_req_dev = adev;
                clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-               clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
 
                amdgpu_device_gpu_recover(adev, NULL, &reset_context);
        }
index fd14fa9..288c414 100644 (file)
@@ -529,7 +529,6 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
                reset_context.method = AMD_RESET_METHOD_NONE;
                reset_context.reset_req_dev = adev;
                clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-               clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
 
                amdgpu_device_gpu_recover(adev, NULL, &reset_context);
        }