diff options
author | YiPeng Chai <YiPeng.Chai@amd.com> | 2024-01-15 11:02:52 +0800 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2024-01-22 17:13:25 -0500 |
commit | 0795b5d234902269fc5182dd8e46f21bdafbcd13 (patch) | |
tree | a38ca06430a55eea7a07c8fa5752649192e085bb /drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | |
parent | afb617f38f221e88dc5b3f3fc2d87cc749175609 (diff) |
drm/amdgpu:Support retiring multiple MCA error address pages
Support retiring multiple MCA error address pages in
one in-band query for umc v12_0.
Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/umc_v12_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 66 |
1 files changed, 35 insertions, 31 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 5ca73fefe358..836a4cc1134e 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -382,42 +382,46 @@ static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade { struct ras_err_node *err_node; uint64_t mc_umc_status; + struct ras_err_info *err_info; + struct ras_err_addr *mca_err_addr, *tmp; struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; for_each_ras_error(err_node, err_data) { - mc_umc_status = err_node->err_info.err_addr.err_status; - if (!mc_umc_status) + err_info = &err_node->err_info; + if (list_empty(&err_info->err_addr_list)) continue; - if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status) || - umc_v12_0_is_deferred_error(adev, mc_umc_status)) { - uint64_t mca_addr, err_addr, mca_ipid; - uint32_t InstanceIdLo; - struct amdgpu_smuio_mcm_config_info *mcm_info; - - mcm_info = &err_node->err_info.mcm_info; - mca_addr = err_node->err_info.err_addr.err_addr; - mca_ipid = err_node->err_info.err_addr.err_ipid; - - err_addr = REG_GET_FIELD(mca_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); - InstanceIdLo = REG_GET_FIELD(mca_ipid, MCMP1_IPIDT0, InstanceIdLo); - - dev_info(adev->dev, "UMC:IPID:0x%llx, aid:%d, inst:%d, ch:%d, err_addr:0x%llx\n", - mca_ipid, - mcm_info->die_id, - MCA_IPID_LO_2_UMC_INST(InstanceIdLo), - MCA_IPID_LO_2_UMC_CH(InstanceIdLo), - err_addr); - - umc_v12_0_convert_error_address(adev, - err_data, err_addr, - MCA_IPID_LO_2_UMC_CH(InstanceIdLo), - MCA_IPID_LO_2_UMC_INST(InstanceIdLo), - mcm_info->die_id); - - /* Clear umc error address content */ - memset(&err_node->err_info.err_addr, - 0, sizeof(err_node->err_info.err_addr)); + list_for_each_entry_safe(mca_err_addr, tmp, &err_info->err_addr_list, node) { + mc_umc_status = mca_err_addr->err_status; + if (mc_umc_status && + (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status) || + umc_v12_0_is_deferred_error(adev, mc_umc_status))) { + uint64_t mca_addr, err_addr, mca_ipid; + uint32_t InstanceIdLo; + + mca_addr = mca_err_addr->err_addr; + mca_ipid = mca_err_addr->err_ipid; + + err_addr = REG_GET_FIELD(mca_addr, + MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + InstanceIdLo = REG_GET_FIELD(mca_ipid, MCMP1_IPIDT0, InstanceIdLo); + + dev_info(adev->dev, "UMC:IPID:0x%llx, aid:%d, inst:%d, ch:%d, err_addr:0x%llx\n", + mca_ipid, + err_info->mcm_info.die_id, + MCA_IPID_LO_2_UMC_INST(InstanceIdLo), + MCA_IPID_LO_2_UMC_CH(InstanceIdLo), + err_addr); + + umc_v12_0_convert_error_address(adev, + err_data, err_addr, + MCA_IPID_LO_2_UMC_CH(InstanceIdLo), + MCA_IPID_LO_2_UMC_INST(InstanceIdLo), + err_info->mcm_info.die_id); + } + + /* Delete error address node from list and free memory */ + amdgpu_ras_del_mca_err_addr(err_info, mca_err_addr); } } } |