summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
authorCandice Li <candice.li@amd.com>2024-01-10 15:30:31 +0800
committerAlex Deucher <alexander.deucher@amd.com>2024-01-15 18:35:37 -0500
commit9c97bf88f4a71c1547e6d4a347597fa77f63abbb (patch)
treeb3072781f844a0d9a05e7ea1d0f3a6be6cb8696f /drivers/gpu/drm
parentbbcbfd4363e9447088e98932c41f417f3bb08050 (diff)
drm/amdgpu: Do bad page retirement for deferred errors
Needs to do bad page retirement for deferred errors. v2: Drop unused dev_info. Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com> Signed-off-by: Candice Li <candice.li@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c10
1 files changed, 4 insertions, 6 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index d65e21914d8c..3932ac81a67c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -93,6 +93,7 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
int ret = 0;
+ unsigned long err_count;
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc));
@@ -147,16 +148,13 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
}
/* only uncorrectable error needs gpu reset */
- if (err_data->ue_count) {
- dev_info(adev->dev, "%ld uncorrectable hardware errors "
- "detected in UMC block\n",
- err_data->ue_count);
-
+ if (err_data->ue_count || err_data->de_count) {
+ err_count = err_data->ue_count + err_data->de_count;
if ((amdgpu_bad_page_threshold != 0) &&
err_data->err_addr_cnt) {
amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
err_data->err_addr_cnt);
- amdgpu_ras_save_bad_pages(adev, &(err_data->ue_count));
+ amdgpu_ras_save_bad_pages(adev, &err_count);
amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs);