From b98a1648d6616d288e888c6dc6dcd4fa543585b3 Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Thu, 13 Oct 2022 11:06:33 +0800 Subject: Revert "drm/amdgpu: let mode2 reset fallback to default when failure" This reverts commit dac6b80818ac2353631c5a33d140d8d5508e2957. This commit reverted the AMDGPU_SKIP_MODE2_RESET as it conflicts with the original design of reset handler. Will redesign it. Fixes: dac6b80818ac23 ("drm/amdgpu: let mode2 reset fallback to default when failure") Signed-off-by: Victor Zhao Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 2dad7aa9a03b..75f1402101f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1950,7 +1950,6 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context); } -- cgit v1.2.3-70-g09d2 From 073285efde229ae82d3b853c7f4bcca81f97a55f Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Tue, 27 Sep 2022 14:06:42 +0800 Subject: drm/amdgpu: Enable ras support for mp0 v13_0_0 and v13_0_10 V1: Enable ras support for CHIP_IP_DISCOVERY asic type. V2: 1. Change commit comment. 2. Enable ras support for mp0 v13_0_0 and v13_0_10. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 75f1402101f4..4a8f73cc4cb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2267,6 +2267,16 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) { + if (adev->asic_type == CHIP_IP_DISCOVERY) { + switch (adev->ip_versions[MP0_HWIP][0]) { + case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 10): + return true; + default: + return false; + } + } + return adev->asic_type == CHIP_VEGA10 || adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS || -- cgit v1.2.3-70-g09d2 From 82835055c62fa49b50dc00736743e8f99ed93638 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Wed, 28 Sep 2022 15:52:02 +0800 Subject: drm/amdgpu: Add sriov vf ras support in amdgpu_ras_asic_supported V2: Add sriov vf ras support in amdgpu_ras_asic_supported. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 4a8f73cc4cb5..a4b47e1bd111 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2267,6 +2267,15 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) { + if (amdgpu_sriov_vf(adev)) { + switch (adev->ip_versions[MP0_HWIP][0]) { + case IP_VERSION(13, 0, 2): + return true; + default: + return false; + } + } + if (adev->asic_type == CHIP_IP_DISCOVERY) { switch (adev->ip_versions[MP0_HWIP][0]) { case IP_VERSION(13, 0, 0): @@ -2320,11 +2329,6 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) !amdgpu_ras_asic_supported(adev)) return; - /* If driver run on sriov guest side, only enable ras for aldebaran */ - if (amdgpu_sriov_vf(adev) && - adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 2)) - return; - if (!adev->gmc.xgmi.connected_to_cpu) { if (amdgpu_atomfirmware_mem_ecc_supported(adev)) { dev_info(adev->dev, "MEM ECC is active.\n"); -- cgit v1.2.3-70-g09d2 From 6c0ca748205dc815505c6de79ecf565953390b66 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Fri, 14 Oct 2022 15:17:43 +0800 Subject: drm/amdgpu: move convert_error_address out of umc_ras RAS error address translation algorithm is common across dGPU and A + A platform as along as the SOC integrates the same generation of UMC IP. UMC RAS is managed by x86 MCA on A + A platform, umc_ras in GPU driver is not initialized at all on A + A platform. In such case, any umc_ras callback implemented for dGPU config shouldn't be invoked from A + A specific callback. The change moves convert_error_address out of dGPU umc_ras structure and makes it share between A + A and dGPU config. Signed-off-by: Hawking Zhang Reviewed-by: Stanley Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 16 ++++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 3 --- drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 7 +++---- drivers/gpu/drm/amd/amdgpu/umc_v6_7.h | 4 +++- 4 files changed, 18 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index a4b47e1bd111..21a47f2bb87b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -36,6 +36,7 @@ #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include "atom.h" #include "amdgpu_reset.h" +#include "umc_v6_7.h" #ifdef CONFIG_X86_MCE_AMD #include @@ -2899,10 +2900,17 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, /* * Translate UMC channel address to Physical address */ - if (adev->umc.ras && - adev->umc.ras->convert_ras_error_address) - adev->umc.ras->convert_ras_error_address(adev, - &err_data, m->addr, ch_inst, umc_inst); + switch (adev->ip_versions[UMC_HWIP][0]) { + case IP_VERSION(6, 7, 0): + umc_v6_7_convert_error_address(adev, + &err_data, m->addr, ch_inst, umc_inst); + break; + default: + dev_warn(adev->dev, + "UMC address to Physical address translation is not supported\n"); + kfree(err_data.err_addr); + return NOTIFY_DONE; + } if (amdgpu_bad_page_threshold != 0) { amdgpu_ras_add_bad_pages(adev, err_data.err_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index e46439274f3a..3629d8f292ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -51,9 +51,6 @@ struct amdgpu_umc_ras { struct amdgpu_ras_block_object ras_block; void (*err_cnt_init)(struct amdgpu_device *adev); bool (*query_ras_poison_mode)(struct amdgpu_device *adev); - void (*convert_ras_error_address)(struct amdgpu_device *adev, - struct ras_err_data *err_data, uint64_t err_addr, - uint32_t ch_inst, uint32_t umc_inst); void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status); void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index 5d5d031c9e7d..72fd963f178b 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -187,9 +187,9 @@ static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev, } } -static void umc_v6_7_convert_error_address(struct amdgpu_device *adev, - struct ras_err_data *err_data, uint64_t err_addr, - uint32_t ch_inst, uint32_t umc_inst) +void umc_v6_7_convert_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t err_addr, + uint32_t ch_inst, uint32_t umc_inst) { uint32_t channel_index; uint64_t soc_pa, retired_page, column; @@ -553,5 +553,4 @@ struct amdgpu_umc_ras umc_v6_7_ras = { .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode, .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count, .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address, - .convert_ras_error_address = umc_v6_7_convert_error_address, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h index fe41ed2f5945..105245d5b6e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h @@ -71,5 +71,7 @@ extern const uint32_t umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]; extern const uint32_t umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]; - +void umc_v6_7_convert_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t err_addr, + uint32_t ch_inst, uint32_t umc_inst); #endif -- cgit v1.2.3-70-g09d2 From 24b822928b5139b85ee9a818a65e343b7e3bb4fe Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Mon, 17 Oct 2022 18:26:17 +0800 Subject: drm/amdgpu: use page retirement API in MCA notifier Make the code more readable. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 36 +++------------------------------ 1 file changed, 3 insertions(+), 33 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 21a47f2bb87b..28463b47ce33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -36,7 +36,6 @@ #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include "atom.h" #include "amdgpu_reset.h" -#include "umc_v6_7.h" #ifdef CONFIG_X86_MCE_AMD #include @@ -2849,7 +2848,6 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, struct amdgpu_device *adev = NULL; uint32_t gpu_id = 0; uint32_t umc_inst = 0, ch_inst = 0; - struct ras_err_data err_data = {0, 0, 0, NULL}; /* * If the error was generated in UMC_V2, which belongs to GPU UMCs, @@ -2888,38 +2886,10 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d", umc_inst, ch_inst); - err_data.err_addr = - kcalloc(adev->umc.max_ras_err_cnt_per_query, - sizeof(struct eeprom_table_record), GFP_KERNEL); - if (!err_data.err_addr) { - dev_warn(adev->dev, - "Failed to alloc memory for umc error record in mca notifier!\n"); - return NOTIFY_DONE; - } - - /* - * Translate UMC channel address to Physical address - */ - switch (adev->ip_versions[UMC_HWIP][0]) { - case IP_VERSION(6, 7, 0): - umc_v6_7_convert_error_address(adev, - &err_data, m->addr, ch_inst, umc_inst); - break; - default: - dev_warn(adev->dev, - "UMC address to Physical address translation is not supported\n"); - kfree(err_data.err_addr); + if (!amdgpu_umc_page_retirement_mca(adev, m->addr, ch_inst, umc_inst)) + return NOTIFY_OK; + else return NOTIFY_DONE; - } - - if (amdgpu_bad_page_threshold != 0) { - amdgpu_ras_add_bad_pages(adev, err_data.err_addr, - err_data.err_addr_cnt); - amdgpu_ras_save_bad_pages(adev); - } - - kfree(err_data.err_addr); - return NOTIFY_OK; } static struct notifier_block amdgpu_bad_page_nb = { -- cgit v1.2.3-70-g09d2 From 1ed0e176902483e67cd02530d387a7551b0e99a4 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 18 Oct 2022 10:31:09 +0800 Subject: drm/amdgpu: remove ras_error_status parameter for UMC poison handler Make the code simpler. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 4 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 13 +++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 4 +--- 4 files changed, 8 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 0561812aa0a4..37db39ba8718 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -753,9 +753,7 @@ bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev) void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset) { - struct ras_err_data err_data = {0, 0, 0, NULL}; - - amdgpu_umc_poison_handler(adev, &err_data, reset); + amdgpu_umc_poison_handler(adev, reset); } bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 28463b47ce33..693bce07eb46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1561,7 +1561,6 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * { bool poison_stat = false; struct amdgpu_device *adev = obj->adev; - struct ras_err_data err_data = {0, 0, 0, NULL}; struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, obj->head.block, 0); @@ -1584,7 +1583,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * } if (!adev->gmc.xgmi.connected_to_cpu) - amdgpu_umc_poison_handler(adev, &err_data, false); + amdgpu_umc_poison_handler(adev, false); if (block_obj->hw_ops->handle_poison_consumption) poison_stat = block_obj->hw_ops->handle_poison_consumption(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 758942150c09..f76c19fc0392 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -165,25 +165,22 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, return AMDGPU_RAS_SUCCESS; } -int amdgpu_umc_poison_handler(struct amdgpu_device *adev, - void *ras_error_status, - bool reset) +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset) { int ret = AMDGPU_RAS_SUCCESS; if (!adev->gmc.xgmi.connected_to_cpu) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + struct ras_err_data err_data = {0, 0, 0, NULL}; struct ras_common_if head = { .block = AMDGPU_RAS_BLOCK__UMC, }; struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); - ret = - amdgpu_umc_do_page_retirement(adev, ras_error_status, NULL, reset); + ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset); if (ret == AMDGPU_RAS_SUCCESS && obj) { - obj->err_data.ue_count += err_data->ue_count; - obj->err_data.ce_count += err_data->ce_count; + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; } } else if (reset) { /* MCA poison handler is only responsible for GPU reset, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 659a10de29c9..a6951160f13a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -83,9 +83,7 @@ struct amdgpu_umc { }; int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); -int amdgpu_umc_poison_handler(struct amdgpu_device *adev, - void *ras_error_status, - bool reset); +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset); int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); -- cgit v1.2.3-70-g09d2 From 1a11a65d5395ccdcd07f19a75da82a3d74c368dd Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Tue, 8 Nov 2022 17:11:18 +0800 Subject: drm/amdgpu: Enable mode-1 reset for RAS recovery in fatal error mode The patch is enabling mode-1 reset for RAS recovery in fatal error mode. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0f5cbc70682a..3d5d5d49cfab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4593,6 +4593,10 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) if (amdgpu_gpu_recovery == 0) goto disabled; + /* Skip soft reset check in fatal error mode */ + if (!amdgpu_ras_is_poison_mode_supported(adev)) + return true; + if (!amdgpu_device_ip_check_soft_reset(adev)) { dev_info(adev->dev,"Timeout, but no hardware hang detected.\n"); return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 693bce07eb46..8fca3cc273e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1948,7 +1948,12 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; - clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + + /* Perform full reset in fatal error mode */ + if (!amdgpu_ras_is_poison_mode_supported(ras->adev)) + set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + else + clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context); } -- cgit v1.2.3-70-g09d2 From 07615da1bf8eaa130ccfcf00f4687aec440652d8 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 20 Oct 2022 16:29:00 +0800 Subject: drm/amdgpu: enable RAS for VCN/JPEG v4.0 Set support flag for VCN/JPEG 4.0. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 8fca3cc273e8..077404a9c935 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2348,7 +2348,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | 1 << AMDGPU_RAS_BLOCK__DF); - if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0)) + if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0) || + adev->ip_versions[VCN_HWIP][0] == IP_VERSION(4, 0, 0)) adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | 1 << AMDGPU_RAS_BLOCK__JPEG); else -- cgit v1.2.3-70-g09d2