drm/amdgpu: add smu v13.0.6 pcs xgmi ras error query support
authorYang Wang <kevinyang.wang@amd.com>
Tue, 7 Nov 2023 10:03:45 +0000 (18:03 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 9 Nov 2023 22:02:59 +0000 (17:02 -0500)
add pcs xgmi ras error query support for smu v13.0.6.

Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c

index e4f30d2..2b488fc 100644 (file)
@@ -54,6 +54,7 @@ enum amdgpu_mca_ip {
        AMDGPU_MCA_IP_SMU,
        AMDGPU_MCA_IP_MP5,
        AMDGPU_MCA_IP_UMC,
+       AMDGPU_MCA_IP_PCS_XGMI,
        AMDGPU_MCA_IP_COUNT,
 };
 
index 45483bc..891605d 100644 (file)
@@ -2379,6 +2379,7 @@ static const struct mca_bank_ipid smu_v13_0_6_mca_ipid_table[AMDGPU_MCA_IP_COUNT
        MCA_BANK_IPID(UMC, 0x96, 0x0),
        MCA_BANK_IPID(SMU, 0x01, 0x1),
        MCA_BANK_IPID(MP5, 0x01, 0x2),
+       MCA_BANK_IPID(PCS_XGMI, 0x50, 0x0),
 };
 
 static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_bank_info *info)
@@ -2482,6 +2483,22 @@ static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct
        return 0;
 }
 
+static int mca_pcs_xgmi_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
+                                         enum amdgpu_mca_error_type type, struct mca_bank_entry *entry,
+                                         uint32_t *count)
+{
+       u32 ext_error_code;
+
+       ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(entry->regs[MCA_REG_IDX_STATUS]);
+
+       if (type == AMDGPU_MCA_ERROR_TYPE_UE && ext_error_code == 0)
+               *count = 1;
+       else if (type == AMDGPU_MCA_ERROR_TYPE_CE && ext_error_code == 6)
+               *count = 1;
+
+       return 0;
+}
+
 static bool mca_smu_check_error_code(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras,
                                     uint32_t errcode)
 {
@@ -2609,6 +2626,10 @@ static const struct mca_ras_info mca_ras_table[] = {
                .err_code_count = ARRAY_SIZE(mmhub_err_codes),
                .get_err_count = mca_smu_mca_get_err_count,
                .bank_is_valid = mca_smu_bank_is_valid,
+       }, {
+               .blkid = AMDGPU_RAS_BLOCK__XGMI_WAFL,
+               .ip = AMDGPU_MCA_IP_PCS_XGMI,
+               .get_err_count = mca_pcs_xgmi_mca_get_err_count,
        },
 };