drm/amd/pm: Add gpu_metrics_v1_7
authorAsad Kamal <asad.kamal@amd.com>
Mon, 11 Nov 2024 12:11:48 +0000 (20:11 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 20 Nov 2024 14:36:37 +0000 (09:36 -0500)
Add new gpu_metrics_v1_7 to acquire xgmi link status,
application counter and max vram bandwidth

v2: Use gpu_metrics_v1_7 for SMU_v_13_0_6 (Lijo)

Signed-off-by: Asad Kamal <asad.kamal@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/include/kgd_pp_interface.h
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c

index 640f6dc..67a5de5 100644 (file)
@@ -364,6 +364,17 @@ struct amdgpu_xcp_metrics {
        uint64_t gfx_busy_acc[MAX_XCC];
 };
 
+struct amdgpu_xcp_metrics_v1_1 {
+       /* Utilization Instantaneous (%) */
+       uint32_t gfx_busy_inst[MAX_XCC];
+       uint16_t jpeg_busy[NUM_JPEG_ENG];
+       uint16_t vcn_busy[NUM_VCN];
+       /* Utilization Accumulated (%) */
+       uint64_t gfx_busy_acc[MAX_XCC];
+       /* Total App Clock Counter Accumulated */
+       uint64_t gfx_below_host_limit_acc[MAX_XCC];
+};
+
 struct amd_pm_funcs {
 /* export for dpm on ci and si */
        int (*pre_set_power_state)(void *handle);
@@ -977,6 +988,105 @@ struct gpu_metrics_v1_6 {
        uint32_t                        pcie_lc_perf_other_end_recovery;
 };
 
+struct gpu_metrics_v1_7 {
+       struct metrics_table_header     common_header;
+
+       /* Temperature (Celsius) */
+       uint16_t                        temperature_hotspot;
+       uint16_t                        temperature_mem;
+       uint16_t                        temperature_vrsoc;
+
+       /* Power (Watts) */
+       uint16_t                        curr_socket_power;
+
+       /* Utilization (%) */
+       uint16_t                        average_gfx_activity;
+       uint16_t                        average_umc_activity; // memory controller
+
+       /* VRAM max bandwidthi (in GB/sec) at max memory clock */
+       uint64_t                        mem_max_bandwidth;
+
+       /* Energy (15.259uJ (2^-16) units) */
+       uint64_t                        energy_accumulator;
+
+       /* Driver attached timestamp (in ns) */
+       uint64_t                        system_clock_counter;
+
+       /* Accumulation cycle counter */
+       uint32_t                        accumulation_counter;
+
+       /* Accumulated throttler residencies */
+       uint32_t                        prochot_residency_acc;
+       uint32_t                        ppt_residency_acc;
+       uint32_t                        socket_thm_residency_acc;
+       uint32_t                        vr_thm_residency_acc;
+       uint32_t                        hbm_thm_residency_acc;
+
+       /* Clock Lock Status. Each bit corresponds to clock instance */
+       uint32_t                        gfxclk_lock_status;
+
+       /* Link width (number of lanes) and speed (in 0.1 GT/s) */
+       uint16_t                        pcie_link_width;
+       uint16_t                        pcie_link_speed;
+
+       /* XGMI bus width and bitrate (in Gbps) */
+       uint16_t                        xgmi_link_width;
+       uint16_t                        xgmi_link_speed;
+
+       /* Utilization Accumulated (%) */
+       uint32_t                        gfx_activity_acc;
+       uint32_t                        mem_activity_acc;
+
+       /*PCIE accumulated bandwidth (GB/sec) */
+       uint64_t                        pcie_bandwidth_acc;
+
+       /*PCIE instantaneous bandwidth (GB/sec) */
+       uint64_t                        pcie_bandwidth_inst;
+
+       /* PCIE L0 to recovery state transition accumulated count */
+       uint64_t                        pcie_l0_to_recov_count_acc;
+
+       /* PCIE replay accumulated count */
+       uint64_t                        pcie_replay_count_acc;
+
+       /* PCIE replay rollover accumulated count */
+       uint64_t                        pcie_replay_rover_count_acc;
+
+       /* PCIE NAK sent  accumulated count */
+       uint32_t                        pcie_nak_sent_count_acc;
+
+       /* PCIE NAK received accumulated count */
+       uint32_t                        pcie_nak_rcvd_count_acc;
+
+       /* XGMI accumulated data transfer size(KiloBytes) */
+       uint64_t                        xgmi_read_data_acc[NUM_XGMI_LINKS];
+       uint64_t                        xgmi_write_data_acc[NUM_XGMI_LINKS];
+
+       /* XGMI link status(active/inactive) */
+       uint16_t                        xgmi_link_status[NUM_XGMI_LINKS];
+
+       uint16_t                        padding;
+
+       /* PMFW attached timestamp (10ns resolution) */
+       uint64_t                        firmware_timestamp;
+
+       /* Current clocks (Mhz) */
+       uint16_t                        current_gfxclk[MAX_GFX_CLKS];
+       uint16_t                        current_socclk[MAX_CLKS];
+       uint16_t                        current_vclk0[MAX_CLKS];
+       uint16_t                        current_dclk0[MAX_CLKS];
+       uint16_t                        current_uclk;
+
+       /* Number of current partition */
+       uint16_t                        num_partition;
+
+       /* XCP metrics stats */
+       struct amdgpu_xcp_metrics_v1_1  xcp_stats[NUM_XCP];
+
+       /* PCIE other end recovery counter */
+       uint32_t                        pcie_lc_perf_other_end_recovery;
+};
+
 /*
  * gpu_metrics_v2_0 is not recommended as it's not naturally aligned.
  * Use gpu_metrics_v2_1 or later instead.
index fa30a9e..11ecaa6 100644 (file)
@@ -370,7 +370,7 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu)
                return -ENOMEM;
        smu_table->metrics_time = 0;
 
-       smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_6);
+       smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_7);
        smu_table->gpu_metrics_table =
                kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
        if (!smu_table->gpu_metrics_table) {
@@ -2321,8 +2321,8 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
 {
        bool per_inst, smu_13_0_6_per_inst, smu_13_0_14_per_inst, apu_per_inst;
        struct smu_table_context *smu_table = &smu->smu_table;
-       struct gpu_metrics_v1_6 *gpu_metrics =
-               (struct gpu_metrics_v1_6 *)smu_table->gpu_metrics_table;
+       struct gpu_metrics_v1_7 *gpu_metrics =
+               (struct gpu_metrics_v1_7 *)smu_table->gpu_metrics_table;
        bool flag = smu_v13_0_6_is_unified_metrics(smu);
        int ret = 0, xcc_id, inst, i, j, k, idx;
        struct amdgpu_device *adev = smu->adev;
@@ -2341,7 +2341,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
 
        metrics_a = (MetricsTableA_t *)metrics_x;
 
-       smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 6);
+       smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 7);
 
        gpu_metrics->temperature_hotspot =
                SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, flag));
index f1ab1a6..dbbd375 100644 (file)
@@ -1081,6 +1081,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev)
        case METRICS_VERSION(1, 6):
                structure_size = sizeof(struct gpu_metrics_v1_6);
                break;
+       case METRICS_VERSION(1, 7):
+               structure_size = sizeof(struct gpu_metrics_v1_7);
+               break;
        case METRICS_VERSION(2, 0):
                structure_size = sizeof(struct gpu_metrics_v2_0);
                break;