From f81c31d975b463c24506d817a48390621f057a57 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Sat, 11 Mar 2023 17:28:38 +0800 Subject: drm/amdgpu: Move vcn ras block init to ras sw_init Initialize vcn ras block only when vcn ip block supports ras features. Driver queries ras capabilities after early_init, ras block init needs to be moved to sw_int. Signed-off-by: Hawking Zhang Reviewed-by: Stanley Yang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 25217b05c0ea..0b1980ac4098 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -1162,19 +1162,28 @@ int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev, return 0; } -void amdgpu_vcn_set_ras_funcs(struct amdgpu_device *adev) +int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev) { + int err; + struct amdgpu_vcn_ras *ras; + if (!adev->vcn.ras) - return; + return 0; - amdgpu_ras_register_ras_block(adev, &adev->vcn.ras->ras_block); + ras = adev->vcn.ras; + err = amdgpu_ras_register_ras_block(adev, &ras->ras_block); + if (err) { + dev_err(adev->dev, "Failed to register vcn ras block!\n"); + return err; + } - strcpy(adev->vcn.ras->ras_block.ras_comm.name, "vcn"); - adev->vcn.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN; - adev->vcn.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; - adev->vcn.ras_if = &adev->vcn.ras->ras_block.ras_comm; + strcpy(ras->ras_block.ras_comm.name, "vcn"); + ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN; + ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; + adev->vcn.ras_if = &ras->ras_block.ras_comm; - /* If don't define special ras_late_init function, use default ras_late_init */ - if (!adev->vcn.ras->ras_block.ras_late_init) - adev->vcn.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; + if (!ras->ras_block.ras_late_init) + ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; + + return 0; } -- cgit v1.2.3-70-g09d2 From 1aff0a5d71d23be6658f893c88c6a9791202bcb1 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Sun, 12 Mar 2023 13:51:00 -0300 Subject: drm/amdgpu/vcn: Disable indirect SRAM on Vangogh broken BIOSes The VCN firmware loading path enables the indirect SRAM mode if it's advertised as supported. We might have some cases of FW issues that prevents this mode to working properly though, ending-up in a failed probe. An example below, observed in the Steam Deck: [...] [drm] failed to load ucode VCN0_RAM(0x3A) [drm] psp gfx command LOAD_IP_FW(0x6) failed and response status is (0xFFFF0000) amdgpu 0000:04:00.0: [drm:amdgpu_ring_test_helper [amdgpu]] *ERROR* ring vcn_dec_0 test failed (-110) [drm:amdgpu_device_init.cold [amdgpu]] *ERROR* hw_init of IP block failed -110 amdgpu 0000:04:00.0: amdgpu: amdgpu_device_ip_init failed amdgpu 0000:04:00.0: amdgpu: Fatal error during GPU init [...] Disabling the VCN block circumvents this, but it's a very invasive workaround that turns off the entire feature. So, let's add a quirk on VCN loading that checks for known problematic BIOSes on Vangogh, so we can proactively disable the indirect SRAM mode and allow the HW proper probe and VCN IP block to work fine. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2385 Fixes: 82132ecc5432 ("drm/amdgpu: enable Vangogh VCN indirect sram mode") Cc: stable@vger.kernel.org Cc: James Zhu Cc: Leo Liu Signed-off-by: Guilherme G. Piccoli Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 0b1980ac4098..e63fcc58e8e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -114,6 +115,24 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) adev->vcn.indirect_sram = true; + /* + * Some Steam Deck's BIOS versions are incompatible with the + * indirect SRAM mode, leading to amdgpu being unable to get + * properly probed (and even potentially crashing the kernel). + * Hence, check for these versions here - notice this is + * restricted to Vangogh (Deck's APU). + */ + if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 0, 2)) { + const char *bios_ver = dmi_get_system_info(DMI_BIOS_VERSION); + + if (bios_ver && (!strncmp("F7A0113", bios_ver, 7) || + !strncmp("F7A0114", bios_ver, 7))) { + adev->vcn.indirect_sram = false; + dev_info(adev->dev, + "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver); + } + } + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); -- cgit v1.2.3-70-g09d2 From ac1d8e2f074d9bffc2d368ad0720cdbb4c938fa5 Mon Sep 17 00:00:00 2001 From: Horatio Zhang Date: Mon, 15 May 2023 21:45:51 -0400 Subject: drm/amdgpu: separate ras irq from vcn instance irq for UVD_POISON Separate vcn RAS poison consumption handling from the instance irq, and register dedicated ras_poison_irq src and funcs for UVD_POISON. v2: - Separate ras irq from vcn instance irq - Improve the subject and code comments v3: - Split the patch into three parts - Improve the code comments Suggested-by: Hawking Zhang Signed-off-by: Horatio Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 27 ++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 3 +++ 2 files changed, 29 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index e63fcc58e8e0..2d94f1b63bd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -1181,6 +1181,31 @@ int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev, return 0; } +int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r, i; + + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + if (amdgpu_ras_is_supported(adev, ras_block->block)) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + r = amdgpu_irq_get(adev, &adev->vcn.inst[i].ras_poison_irq, 0); + if (r) + goto late_fini; + } + } + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + return r; +} + int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev) { int err; @@ -1202,7 +1227,7 @@ int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev) adev->vcn.ras_if = &ras->ras_block.ras_comm; if (!ras->ras_block.ras_late_init) - ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; + ras->ras_block.ras_late_init = amdgpu_vcn_ras_late_init; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index c730949ece7d..f1397ef66fd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -234,6 +234,7 @@ struct amdgpu_vcn_inst { struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; atomic_t sched_score; struct amdgpu_irq_src irq; + struct amdgpu_irq_src ras_poison_irq; struct amdgpu_vcn_reg external; struct amdgpu_bo *dpg_sram_bo; struct dpg_pause_state pause_state; @@ -400,6 +401,8 @@ void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); +int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev, + struct ras_common_if *ras_block); int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev); #endif -- cgit v1.2.3-70-g09d2