drm/amdgpu: Place NPS mode request on unload
authorLijo Lazar <lijo.lazar@amd.com>
Fri, 20 Sep 2024 07:44:40 +0000 (13:14 +0530)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 15 Oct 2024 15:16:20 +0000 (11:16 -0400)
If a user has requested NPS mode switch, place the request through PSP
during unload of the driver. For devices which are part of a hive, all
requests are placed together. If one of them fails, revert back to the
current NPS mode.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Reviewed-by: Feifei Xu <Feifei.Xu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h

index 68f9c17..46f7565 100644 (file)
@@ -2429,6 +2429,7 @@ amdgpu_pci_remove(struct pci_dev *pdev)
        struct amdgpu_device *adev = drm_to_adev(dev);
 
        amdgpu_xcp_dev_unplug(adev);
+       amdgpu_gmc_prepare_nps_mode_change(adev);
        drm_dev_unplug(dev);
 
        if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
index ddf716d..4b70123 100644 (file)
@@ -1333,3 +1333,50 @@ int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev,
 
        return psp_memory_partition(&adev->psp, nps_mode);
 }
+
+static inline bool amdgpu_gmc_need_nps_switch_req(struct amdgpu_device *adev,
+                                                 int req_nps_mode,
+                                                 int cur_nps_mode)
+{
+       return (((BIT(req_nps_mode) & adev->gmc.supported_nps_modes) ==
+                       BIT(req_nps_mode)) &&
+               req_nps_mode != cur_nps_mode);
+}
+
+void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev)
+{
+       int req_nps_mode, cur_nps_mode, r;
+       struct amdgpu_hive_info *hive;
+
+       if (amdgpu_sriov_vf(adev) || !adev->gmc.supported_nps_modes ||
+           !adev->gmc.gmc_funcs->request_mem_partition_mode)
+               return;
+
+       cur_nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+       hive = amdgpu_get_xgmi_hive(adev);
+       if (hive) {
+               req_nps_mode = atomic_read(&hive->requested_nps_mode);
+               if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode,
+                                                   cur_nps_mode)) {
+                       amdgpu_put_xgmi_hive(hive);
+                       return;
+               }
+               r = amdgpu_xgmi_request_nps_change(adev, hive, req_nps_mode);
+               amdgpu_put_xgmi_hive(hive);
+               goto out;
+       }
+
+       req_nps_mode = adev->gmc.requested_nps_mode;
+       if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode, cur_nps_mode))
+               return;
+
+       /* even if this fails, we should let driver unload w/o blocking */
+       r = adev->gmc.gmc_funcs->request_mem_partition_mode(adev, req_nps_mode);
+out:
+       if (r)
+               dev_err(adev->dev, "NPS mode change request failed\n");
+       else
+               dev_info(
+                       adev->dev,
+                       "NPS mode change request done, reload driver to complete the change\n");
+}
index 05c7073..b92f41d 100644 (file)
@@ -466,4 +466,6 @@ int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
 
 int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev,
                                        int nps_mode);
+void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev);
+
 #endif
index d537896..6350371 100644 (file)
@@ -1564,3 +1564,41 @@ int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev)
 
        return 0;
 }
+
+int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev,
+                                  struct amdgpu_hive_info *hive,
+                                  int req_nps_mode)
+{
+       struct amdgpu_device *tmp_adev;
+       int cur_nps_mode, r;
+
+       /* This is expected to be called only during unload of driver. The
+        * request needs to be placed only once for all devices in the hive. If
+        * one of them fail, revert the request for previous successful devices.
+        * After placing the request, make hive mode as UNKNOWN so that other
+        * devices don't request anymore.
+        */
+       mutex_lock(&hive->hive_lock);
+       list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+               r = adev->gmc.gmc_funcs->request_mem_partition_mode(
+                       tmp_adev, req_nps_mode);
+               if (r)
+                       goto err;
+       }
+       /* Set to UNKNOWN so that other devices don't request anymore */
+       atomic_set(&hive->requested_nps_mode, UNKNOWN_MEMORY_PARTITION_MODE);
+
+       mutex_unlock(&hive->hive_lock);
+
+       return 0;
+err:
+       /* Request back current mode if one of the requests failed */
+       cur_nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(tmp_adev);
+       list_for_each_entry_continue_reverse(tmp_adev, &hive->device_list,
+                                            gmc.xgmi.head)
+               adev->gmc.gmc_funcs->request_mem_partition_mode(tmp_adev,
+                                                               cur_nps_mode);
+       mutex_lock(&hive->hive_lock);
+
+       return r;
+}
index 67abadb..41d5f97 100644 (file)
@@ -79,4 +79,8 @@ static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
 int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev);
 int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev);
 
+int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev,
+                                  struct amdgpu_hive_info *hive,
+                                  int req_nps_mode);
+
 #endif