drm/amdgpu: fix possible pstate switch race condition
authorEvan Quan <evan.quan@amd.com>
Thu, 31 Oct 2019 06:15:29 +0000 (14:15 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 6 Nov 2019 21:27:48 +0000 (16:27 -0500)
Added lock protection so that the p-state switch will
be guarded to be sequential. Also update the hive
pstate only all device from the hive are in the same
state.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Feifei Xu <Feifei.Xu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c

index 0bb08e0..bcc5d40 100644 (file)
@@ -977,6 +977,9 @@ struct amdgpu_device {
 
        uint64_t                        unique_id;
        uint64_t        df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
+
+       /* device pstate */
+       int                             pstate;
 };
 
 static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
index 167d9fb..de20a9a 100644 (file)
@@ -274,12 +274,18 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
 {
        int ret = 0;
        struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
+       struct amdgpu_device *tmp_adev;
+       bool update_hive_pstate = true;
 
        if (!hive)
                return 0;
 
-       if (hive->pstate == pstate)
+       mutex_lock(&hive->hive_lock);
+
+       if (hive->pstate == pstate) {
+               mutex_unlock(&hive->hive_lock);
                return 0;
+       }
 
        dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate);
 
@@ -290,11 +296,32 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
                ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle,
                                                                pstate);
 
-       if (ret)
+       if (ret) {
                dev_err(adev->dev,
                        "XGMI: Set pstate failure on device %llx, hive %llx, ret %d",
                        adev->gmc.xgmi.node_id,
                        adev->gmc.xgmi.hive_id, ret);
+               goto out;
+       }
+
+       /* Update device pstate */
+       adev->pstate = pstate;
+
+       /*
+        * Update the hive pstate only all devices of the hive
+        * are in the same pstate
+        */
+       list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+               if (tmp_adev->pstate != adev->pstate) {
+                       update_hive_pstate = false;
+                       break;
+               }
+       }
+       if (update_hive_pstate)
+               hive->pstate = pstate;
+
+out:
+       mutex_unlock(&hive->hive_lock);
 
        return ret;
 }
@@ -369,6 +396,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
                goto exit;
        }
 
+       /* Set default device pstate */
+       adev->pstate = -1;
+
        top_info = &adev->psp.xgmi_context.top_info;
 
        list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);