drm/amd/powerplay: add new sysfs interface for retrieving gpu metrics(V2)
authorEvan Quan <evan.quan@amd.com>
Thu, 23 Jul 2020 10:03:35 +0000 (18:03 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 6 Aug 2020 19:43:56 +0000 (15:43 -0400)
A new interface for UMD to retrieve gpu metrics data.

V2: rich the documentation

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Documentation/gpu/amdgpu.rst
drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
drivers/gpu/drm/amd/include/kgd_pp_interface.h
drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h

index 1711235..0f7679a 100644 (file)
@@ -206,6 +206,12 @@ pp_power_profile_mode
 .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
    :doc: mem_busy_percent
 
+gpu_metrics
+~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+   :doc: gpu_metrics
+
 GPU Product Information
 =======================
 
index aa27fe6..b190c0a 100644 (file)
@@ -369,6 +369,9 @@ enum amdgpu_pcie_gen {
                ((adev)->powerplay.pp_funcs->set_ppfeature_status(\
                        (adev)->powerplay.pp_handle, (ppfeatures)))
 
+#define amdgpu_dpm_get_gpu_metrics(adev, table) \
+               ((adev)->powerplay.pp_funcs->get_gpu_metrics((adev)->powerplay.pp_handle, table))
+
 struct amdgpu_dpm {
        struct amdgpu_ps        *ps;
        /* number of valid power states */
index 576e3ac..1705e32 100644 (file)
@@ -2120,6 +2120,59 @@ static ssize_t amdgpu_set_thermal_throttling_logging(struct device *dev,
        return count;
 }
 
+/**
+ * DOC: gpu_metrics
+ *
+ * The amdgpu driver provides a sysfs API for retrieving current gpu
+ * metrics data. The file gpu_metrics is used for this. Reading the
+ * file will dump all the current gpu metrics data.
+ *
+ * These data include temperature, frequency, engines utilization,
+ * power consume, throttler status, fan speed and cpu core statistics(
+ * available for APU only). That's it will give a snapshot of all sensors
+ * at the same time.
+ */
+static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
+                                     struct device_attribute *attr,
+                                     char *buf)
+{
+       struct drm_device *ddev = dev_get_drvdata(dev);
+       struct amdgpu_device *adev = ddev->dev_private;
+       void *gpu_metrics;
+       ssize_t size = 0;
+       int ret;
+
+       if (amdgpu_in_reset(adev))
+               return -EPERM;
+
+       ret = pm_runtime_get_sync(ddev->dev);
+       if (ret < 0) {
+               pm_runtime_put_autosuspend(ddev->dev);
+               return ret;
+       }
+
+       down_read(&adev->reset_sem);
+       if (is_support_sw_smu(adev))
+               size = smu_sys_get_gpu_metrics(&adev->smu, &gpu_metrics);
+       else if (adev->powerplay.pp_funcs->get_gpu_metrics)
+               size = amdgpu_dpm_get_gpu_metrics(adev, &gpu_metrics);
+       up_read(&adev->reset_sem);
+
+       if (size <= 0)
+               goto out;
+
+       if (size >= PAGE_SIZE)
+               size = PAGE_SIZE - 1;
+
+       memcpy(buf, gpu_metrics, size);
+
+out:
+       pm_runtime_mark_last_busy(ddev->dev);
+       pm_runtime_put_autosuspend(ddev->dev);
+
+       return size;
+}
+
 static struct amdgpu_device_attr amdgpu_device_attrs[] = {
        AMDGPU_DEVICE_ATTR_RW(power_dpm_state,                          ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
        AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level,        ATTR_FLAG_BASIC),
@@ -2143,6 +2196,7 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = {
        AMDGPU_DEVICE_ATTR_RW(pp_features,                              ATTR_FLAG_BASIC),
        AMDGPU_DEVICE_ATTR_RO(unique_id,                                ATTR_FLAG_BASIC),
        AMDGPU_DEVICE_ATTR_RW(thermal_throttling_logging,               ATTR_FLAG_BASIC),
+       AMDGPU_DEVICE_ATTR_RO(gpu_metrics,                              ATTR_FLAG_BASIC),
 };
 
 static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,
@@ -2192,6 +2246,9 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
        } else if (DEVICE_ATTR_IS(pp_features)) {
                if (adev->flags & AMD_IS_APU || asic_type < CHIP_VEGA10)
                        *states = ATTR_STATE_UNSUPPORTED;
+       } else if (DEVICE_ATTR_IS(gpu_metrics)) {
+               if (asic_type < CHIP_VEGA12)
+                       *states = ATTR_STATE_UNSUPPORTED;
        }
 
        if (asic_type == CHIP_ARCTURUS) {
index 5f38ee6..0aec28f 100644 (file)
@@ -322,6 +322,7 @@ struct amd_pm_funcs {
        int (*asic_reset_mode_2)(void *handle);
        int (*set_df_cstate)(void *handle, enum pp_df_cstate state);
        int (*set_xgmi_pstate)(void *handle, uint32_t pstate);
+       ssize_t (*get_gpu_metrics)(void *handle, void **table);
 };
 
 struct metrics_table_header {
index d03b485..f3f50b5 100644 (file)
@@ -2516,3 +2516,23 @@ int smu_get_dpm_clock_table(struct smu_context *smu,
 
        return ret;
 }
+
+ssize_t smu_sys_get_gpu_metrics(struct smu_context *smu,
+                               void **table)
+{
+       ssize_t size;
+
+       if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+               return -EOPNOTSUPP;
+
+       if (!smu->ppt_funcs->get_gpu_metrics)
+               return -EOPNOTSUPP;
+
+       mutex_lock(&smu->mutex);
+
+       size = smu->ppt_funcs->get_gpu_metrics(smu, table);
+
+       mutex_unlock(&smu->mutex);
+
+       return size;
+}
index b57b104..a08155b 100644 (file)
@@ -589,6 +589,7 @@ struct pptable_funcs {
        void (*log_thermal_throttling_event)(struct smu_context *smu);
        size_t (*get_pp_feature_mask)(struct smu_context *smu, char *buf);
        int (*set_pp_feature_mask)(struct smu_context *smu, uint64_t new_mask);
+       ssize_t (*get_gpu_metrics)(struct smu_context *smu, void **table);
 };
 
 typedef enum {
@@ -791,5 +792,7 @@ int smu_get_dpm_clock_table(struct smu_context *smu,
 
 int smu_get_status_gfxoff(struct amdgpu_device *adev, uint32_t *value);
 
+ssize_t smu_sys_get_gpu_metrics(struct smu_context *smu, void **table);
+
 #endif
 #endif