drm/amdgpu: fix RAS unload driver issue in SRIOV
authorYang Wang <kevinyang.wang@amd.com>
Tue, 7 May 2024 02:28:04 +0000 (10:28 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 8 May 2024 19:17:05 +0000 (15:17 -0400)
Fix null pointer issue when unload driver in SRIOV mode.

Adjust the function position to ensure that the amdgpu_mca/aca_xxx_init()
related functions can be initialized properly.

Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

index 7b30f44..2c5ad95 100644 (file)
@@ -3605,10 +3605,6 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
        struct amdgpu_ras_block_object *obj;
        int r;
 
-       /* Guest side doesn't need init ras feature */
-       if (amdgpu_sriov_vf(adev))
-               return 0;
-
        amdgpu_ras_event_mgr_init(adev);
 
        if (amdgpu_aca_is_enabled(adev)) {
@@ -3619,7 +3615,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
                if (r)
                        return r;
 
-               amdgpu_ras_set_aca_debug_mode(adev, false);
+               if (!amdgpu_sriov_vf(adev))
+                       amdgpu_ras_set_aca_debug_mode(adev, false);
        } else {
                if (amdgpu_in_reset(adev))
                        r = amdgpu_mca_reset(adev);
@@ -3628,9 +3625,14 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
                if (r)
                        return r;
 
-               amdgpu_ras_set_mca_debug_mode(adev, false);
+               if (!amdgpu_sriov_vf(adev))
+                       amdgpu_ras_set_mca_debug_mode(adev, false);
        }
 
+       /* Guest side doesn't need init ras feature */
+       if (amdgpu_sriov_vf(adev))
+               return 0;
+
        list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
                obj = node->ras_obj;
                if (!obj) {