From 7e882aee845fa11da043a2e7f872551763b3a9dc Mon Sep 17 00:00:00 2001 From: John Clements Date: Wed, 21 Apr 2021 15:01:54 +0800 Subject: drm/amdgpu: add support for ras init flags conditionally configure ras for dgpu mode or poison propogation mode Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index a09483beb968..9311dcc94cb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1037,6 +1037,13 @@ static int psp_ras_load(struct psp_context *psp) memset(psp->fw_pri_buf, 0, PSP_1_MEG); memcpy(psp->fw_pri_buf, psp->ta_ras_start_addr, psp->ta_ras_ucode_size); + ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + + if (psp->adev->gmc.xgmi.connected_to_cpu) + ras_cmd->ras_in_message.init_flags.poison_mode_en = 1; + else + ras_cmd->ras_in_message.init_flags.dgpu_mode = 1; + psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->ta_ras_ucode_size, @@ -1046,8 +1053,6 @@ static int psp_ras_load(struct psp_context *psp) ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; - if (!ret) { psp->ras.session_id = cmd->resp.session_id; -- cgit v1.2.3-70-g09d2 From 041e69160d161f5b4ad77cd915640c5cfe9c9bc3 Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Thu, 22 Apr 2021 17:38:09 +0800 Subject: drm/amdgpu/sriov: Remove clear vf fw support PSP clear_vf_fw feature is outdated and has been removed. Remove the related functions. Signed-off-by: Victor Zhao Reviewed-by: Emily Deng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 32 -------------------------------- drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h | 1 - 2 files changed, 33 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 9311dcc94cb6..623044414bb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -417,26 +417,6 @@ static int psp_tmr_init(struct psp_context *psp) return ret; } -static int psp_clear_vf_fw(struct psp_context *psp) -{ - int ret; - struct psp_gfx_cmd_resp *cmd; - - if (!amdgpu_sriov_vf(psp->adev) || psp->adev->asic_type != CHIP_NAVI12) - return 0; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - cmd->cmd_id = GFX_CMD_ID_CLEAR_VF_FW; - - ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); - - return ret; -} - static bool psp_skip_tmr(struct psp_context *psp) { switch (psp->adev->asic_type) { @@ -1925,12 +1905,6 @@ static int psp_hw_start(struct psp_context *psp) return ret; } - ret = psp_clear_vf_fw(psp); - if (ret) { - DRM_ERROR("PSP clear vf fw!\n"); - return ret; - } - ret = psp_boot_config_set(adev); if (ret) { DRM_WARN("PSP set boot config@\n"); @@ -2439,7 +2413,6 @@ static int psp_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; - int ret; if (psp->adev->psp.ta_fw) { psp_ras_terminate(psp); @@ -2450,11 +2423,6 @@ static int psp_hw_fini(void *handle) } psp_asd_unload(psp); - ret = psp_clear_vf_fw(psp); - if (ret) { - DRM_ERROR("PSP clear vf fw!\n"); - return ret; - } psp_tmr_terminate(psp); psp_ring_destroy(psp, PSP_RING_TYPE__KM); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 96064c343163..f6d3180febc4 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -97,7 +97,6 @@ enum psp_gfx_cmd_id GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */ GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */ GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */ - GFX_CMD_ID_CLEAR_VF_FW = 0x0000000D, /* Clear VF FW, to be used on VF shutdown. */ GFX_CMD_ID_GET_FW_ATTESTATION = 0x0000000F, /* Query GPUVA of the Fw Attestation DB */ /* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */ GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */ -- cgit v1.2.3-70-g09d2 From e7de0d844ea803d752e0b41e6b7dcc35d671c186 Mon Sep 17 00:00:00 2001 From: Zhigang Luo Date: Thu, 29 Apr 2021 15:37:31 -0400 Subject: drm/amdgpu: Add Aldebaran virtualization support 1. add Aldebaran in virtualization detection list. 2. disable Aldebaran virtual display support as there is no GFX engine in Aldebaran. 3. skip TMR loading if Aldebaran is in virtualizatin mode as it shares the one host loaded. Signed-off-by: Zhigang Luo Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 623044414bb5..17b728d2c1f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -422,6 +422,7 @@ static bool psp_skip_tmr(struct psp_context *psp) switch (psp->adev->asic_type) { case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_ALDEBARAN: return true; default: return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 0c9c5255aa42..a57842689d42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -50,9 +50,12 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) struct drm_device *ddev = adev_to_drm(adev); /* enable virtual display */ - if (adev->mode_info.num_crtc == 0) - adev->mode_info.num_crtc = 1; - adev->enable_virtual_display = true; + if (adev->asic_type != CHIP_ALDEBARAN && + adev->asic_type != CHIP_ARCTURUS) { + if (adev->mode_info.num_crtc == 0) + adev->mode_info.num_crtc = 1; + adev->enable_virtual_display = true; + } ddev->driver_features &= ~DRIVER_ATOMIC; adev->cg_flags = 0; adev->pg_flags = 0; @@ -679,6 +682,7 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) case CHIP_VEGA10: case CHIP_VEGA20: case CHIP_ARCTURUS: + case CHIP_ALDEBARAN: soc15_set_virt_ops(adev); break; case CHIP_NAVI10: -- cgit v1.2.3-70-g09d2 From acdae2169bae6993c61ded36ae0b2301c07c0a9e Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Mon, 3 May 2021 20:02:22 -0400 Subject: drm/amdgpu: Remove redundant ras->supported MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove redundant ras->supported, as this value is also stored in adev->ras_features. Use adev->ras_features, as that supercedes "ras", since the latter is its member. The dependency goes like this: ras <== adev->ras_features <== hw_supported, and is read as "ras depends on ras_features, which depends on hw_supported." The arrows show the flow of information, i.e. the dependency update. "hw_supported" should also live in "adev". Cc: Alexander Deucher Cc: John Clements Cc: Hawking Zhang Signed-off-by: Luben Tuikov Acked-by: Christian König Reviewed-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 9 ++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 4 +--- drivers/gpu/drm/amd/amdgpu/soc15.c | 7 ++++--- drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c | 2 +- drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 3 ++- 8 files changed, 18 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1b60f8205f15..0ed11428a57d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5108,7 +5108,8 @@ int amdgpu_device_baco_enter(struct drm_device *dev) if (!amdgpu_device_supports_baco(adev_to_drm(adev))) return -ENOTSUPP; - if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt) + if (ras && adev->ras_features && + adev->nbio.funcs->enable_doorbell_interrupt) adev->nbio.funcs->enable_doorbell_interrupt(adev, false); return amdgpu_dpm_baco_enter(adev); @@ -5127,7 +5128,8 @@ int amdgpu_device_baco_exit(struct drm_device *dev) if (ret) return ret; - if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt) + if (ras && adev->ras_features && + adev->nbio.funcs->enable_doorbell_interrupt) adev->nbio.funcs->enable_doorbell_interrupt(adev, true); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 07e8a7c28561..7a6a87e5c79e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -986,7 +986,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (!ras) return -EINVAL; - ras_mask = (uint64_t)ras->supported << 32 | ras->features; + ras_mask = (uint64_t)adev->ras_features << 32 | ras->features; return copy_to_user(out, &ras_mask, min_t(u64, size, sizeof(ras_mask))) ? diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 17b728d2c1f2..4885b718cb6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2146,7 +2146,7 @@ static int psp_load_smu_fw(struct psp_context *psp) return 0; if ((amdgpu_in_reset(adev) && - ras && ras->supported && + ras && adev->ras_features && (adev->asic_type == CHIP_ARCTURUS || adev->asic_type == CHIP_VEGA20)) || (adev->in_runpm && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index ebbe2c5190c4..a484ac6a8399 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2130,9 +2130,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev, /* hw_supported needs to be aligned with RAS block mask. */ *hw_supported &= AMDGPU_RAS_BLOCK_MASK; - *supported = amdgpu_ras_enable == 0 ? - 0 : *hw_supported & amdgpu_ras_mask; - adev->ras_features = *supported; + *supported = amdgpu_ras_enable == 0 ? 0 : + *hw_supported & amdgpu_ras_mask; } int amdgpu_ras_init(struct amdgpu_device *adev) @@ -2154,7 +2153,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev) amdgpu_ras_set_context(adev, con); amdgpu_ras_check_supported(adev, &con->hw_supported, - &con->supported); + &adev->ras_features); if (!con->hw_supported || (adev->asic_type == CHIP_VEGA10)) { /* set gfx block ras context feature for VEGA20 Gaming * send ras disable cmd to ras ta during ras late init. @@ -2210,7 +2209,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev) dev_info(adev->dev, "RAS INFO: ras initialized successfully, " "hardware ability[%x] ras_mask[%x]\n", - con->hw_supported, con->supported); + con->hw_supported, adev->ras_features); return 0; release_con: amdgpu_ras_set_context(adev, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 60df268a0c66..3e830dc1a33d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -314,8 +314,6 @@ struct amdgpu_ras { /* ras infrastructure */ /* for ras itself. */ uint32_t hw_supported; - /* for IP to check its ras ability. */ - uint32_t supported; uint32_t features; struct list_head head; /* sysfs */ @@ -478,7 +476,7 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, if (block >= AMDGPU_RAS_BLOCK_COUNT) return 0; - return ras && (ras->supported & (1 << block)); + return ras && (adev->ras_features & (1 << block)); } int amdgpu_ras_recovery_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index e3f42ad1e6bc..301695f3f237 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -655,7 +655,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) int ret = 0; /* avoid NBIF got stuck when do RAS recovery in BACO reset */ - if (ras && ras->supported) + if (ras && adev->ras_features) adev->nbio.funcs->enable_doorbell_interrupt(adev, false); ret = amdgpu_dpm_baco_reset(adev); @@ -663,7 +663,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) return ret; /* re-enable doorbell interrupt after BACO exit */ - if (ras && ras->supported) + if (ras && adev->ras_features) adev->nbio.funcs->enable_doorbell_interrupt(adev, true); return 0; @@ -710,7 +710,8 @@ soc15_asic_reset_method(struct amdgpu_device *adev) * 1. PMFW version > 0x284300: all cases use baco * 2. PMFW version <= 0x284300: only sGPU w/o RAS use baco */ - if ((ras && ras->supported) && adev->pm.fw_version <= 0x283400) + if (ras && adev->ras_features && + adev->pm.fw_version <= 0x283400) baco_reset = false; break; case CHIP_ALDEBARAN: diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c index 2a28c9df15a0..f6b1efce450c 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c @@ -85,7 +85,7 @@ int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state) return 0; if (state == BACO_STATE_IN) { - if (!ras || !ras->supported) { + if (!ras || !adev->ras_features) { data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL); data |= 0x80000000; WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 6274cae4a065..72581e43d83e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1531,7 +1531,8 @@ int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state) NULL); break; default: - if (!ras || !ras->supported || adev->gmc.xgmi.pending_reset) { + if (!ras || !adev->ras_features || + adev->gmc.xgmi.pending_reset) { if (adev->asic_type == CHIP_ARCTURUS) { data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL_ARCT); data |= 0x80000000; -- cgit v1.2.3-70-g09d2 From 8ab0d6f030bab4d8c7310e9894f6fdb59817d241 Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Tue, 4 May 2021 02:25:29 -0400 Subject: drm/amdgpu: Rename to ras_*_enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename, ras_hw_supported --> ras_hw_enabled, and ras_features --> ras_enabled, to show that ras_enabled is a subset of ras_hw_enabled, which itself is a subset of the ASIC capability. Cc: Alexander Deucher Cc: John Clements Cc: Hawking Zhang Signed-off-by: Luben Tuikov Acked-by: Christian König Reviewed-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 50 +++++++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/soc15.c | 6 +-- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 6 +-- .../gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c | 2 +- drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 2 +- 11 files changed, 41 insertions(+), 41 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ba740544eb77..cef7bbe850e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1073,8 +1073,8 @@ struct amdgpu_device { atomic_t throttling_logging_enabled; struct ratelimit_state throttling_logging_rs; - uint32_t ras_hw_supported; - uint32_t ras_features; + uint32_t ras_hw_enabled; + uint32_t ras_enabled; bool in_pci_err_recovery; struct pci_saved_state *pci_state; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0ed11428a57d..b0543f409039 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5108,7 +5108,7 @@ int amdgpu_device_baco_enter(struct drm_device *dev) if (!amdgpu_device_supports_baco(adev_to_drm(adev))) return -ENOTSUPP; - if (ras && adev->ras_features && + if (ras && adev->ras_enabled && adev->nbio.funcs->enable_doorbell_interrupt) adev->nbio.funcs->enable_doorbell_interrupt(adev, false); @@ -5128,7 +5128,7 @@ int amdgpu_device_baco_exit(struct drm_device *dev) if (ret) return ret; - if (ras && adev->ras_features && + if (ras && adev->ras_enabled && adev->nbio.funcs->enable_doorbell_interrupt) adev->nbio.funcs->enable_doorbell_interrupt(adev, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 7a6a87e5c79e..8d12e474745a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -986,7 +986,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (!ras) return -EINVAL; - ras_mask = (uint64_t)adev->ras_features << 32 | ras->features; + ras_mask = (uint64_t)adev->ras_enabled << 32 | ras->features; return copy_to_user(out, &ras_mask, min_t(u64, size, sizeof(ras_mask))) ? diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 4885b718cb6c..3179ca9fc03f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2146,7 +2146,7 @@ static int psp_load_smu_fw(struct psp_context *psp) return 0; if ((amdgpu_in_reset(adev) && - ras && adev->ras_features && + ras && adev->ras_enabled && (adev->asic_type == CHIP_ARCTURUS || adev->asic_type == CHIP_VEGA20)) || (adev->in_runpm && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index e7940e813f32..444f5325f092 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -532,7 +532,7 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return NULL; if (head->block >= AMDGPU_RAS_BLOCK_COUNT) @@ -559,7 +559,7 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, struct ras_manager *obj; int i; - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return NULL; if (head) { @@ -613,7 +613,7 @@ static void amdgpu_ras_parse_status_code(struct amdgpu_device *adev, static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev, struct ras_common_if *head) { - return adev->ras_hw_supported & BIT(head->block); + return adev->ras_hw_enabled & BIT(head->block); } static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev, @@ -767,7 +767,7 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev, ret = amdgpu_ras_feature_enable(adev, head, 0); /* clean gfx block ras features flag */ - if (adev->ras_features && head->block == AMDGPU_RAS_BLOCK__GFX) + if (adev->ras_enabled && head->block == AMDGPU_RAS_BLOCK__GFX) con->features &= ~BIT(head->block); } } else @@ -1072,7 +1072,7 @@ unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, struct ras_manager *obj; struct ras_err_data data = {0, 0}; - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return 0; list_for_each_entry(obj, &con->head, node) { @@ -1595,7 +1595,7 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return; list_for_each_entry(obj, &con->head, node) { @@ -1645,7 +1645,7 @@ static void amdgpu_ras_query_err_status(struct amdgpu_device *adev) struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return; list_for_each_entry(obj, &con->head, node) { @@ -1959,7 +1959,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) bool exc_err_limit = false; int ret; - if (adev->ras_features && con) + if (adev->ras_enabled && con) data = &con->eh_data; else return 0; @@ -2076,7 +2076,7 @@ static void amdgpu_ras_get_quirks(struct amdgpu_device *adev) if (strnstr(ctx->vbios_version, "D16406", sizeof(ctx->vbios_version))) - adev->ras_hw_supported |= (1 << AMDGPU_RAS_BLOCK__GFX); + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX); } /* @@ -2090,7 +2090,7 @@ static void amdgpu_ras_get_quirks(struct amdgpu_device *adev) */ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) { - adev->ras_hw_supported = adev->ras_features = 0; + adev->ras_hw_enabled = adev->ras_enabled = 0; if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw || !amdgpu_ras_asic_supported(adev)) @@ -2099,7 +2099,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) if (!adev->gmc.xgmi.connected_to_cpu) { if (amdgpu_atomfirmware_mem_ecc_supported(adev)) { dev_info(adev->dev, "MEM ECC is active.\n"); - adev->ras_hw_supported |= (1 << AMDGPU_RAS_BLOCK__UMC | + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC | 1 << AMDGPU_RAS_BLOCK__DF); } else { dev_info(adev->dev, "MEM ECC is not presented.\n"); @@ -2107,7 +2107,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) if (amdgpu_atomfirmware_sram_ecc_supported(adev)) { dev_info(adev->dev, "SRAM ECC is active.\n"); - adev->ras_hw_supported |= ~(1 << AMDGPU_RAS_BLOCK__UMC | + adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | 1 << AMDGPU_RAS_BLOCK__DF); } else { dev_info(adev->dev, "SRAM ECC is not presented.\n"); @@ -2115,7 +2115,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) } else { /* driver only manages a few IP blocks RAS feature * when GPU is connected cpu through XGMI */ - adev->ras_hw_supported |= (1 << AMDGPU_RAS_BLOCK__GFX | + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX | 1 << AMDGPU_RAS_BLOCK__SDMA | 1 << AMDGPU_RAS_BLOCK__MMHUB); } @@ -2123,10 +2123,10 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) amdgpu_ras_get_quirks(adev); /* hw_supported needs to be aligned with RAS block mask. */ - adev->ras_hw_supported &= AMDGPU_RAS_BLOCK_MASK; + adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK; - adev->ras_features = amdgpu_ras_enable == 0 ? 0 : - adev->ras_hw_supported & amdgpu_ras_mask; + adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 : + adev->ras_hw_enabled & amdgpu_ras_mask; } int amdgpu_ras_init(struct amdgpu_device *adev) @@ -2149,11 +2149,11 @@ int amdgpu_ras_init(struct amdgpu_device *adev) amdgpu_ras_check_supported(adev); - if (!adev->ras_hw_supported || adev->asic_type == CHIP_VEGA10) { + if (!adev->ras_hw_enabled || adev->asic_type == CHIP_VEGA10) { /* set gfx block ras context feature for VEGA20 Gaming * send ras disable cmd to ras ta during ras late init. */ - if (!adev->ras_features && adev->asic_type == CHIP_VEGA20) { + if (!adev->ras_enabled && adev->asic_type == CHIP_VEGA20) { con->features |= BIT(AMDGPU_RAS_BLOCK__GFX); return 0; @@ -2204,7 +2204,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev) dev_info(adev->dev, "RAS INFO: ras initialized successfully, " "hardware ability[%x] ras_mask[%x]\n", - adev->ras_hw_supported, adev->ras_features); + adev->ras_hw_enabled, adev->ras_enabled); return 0; release_con: @@ -2319,7 +2319,7 @@ void amdgpu_ras_resume(struct amdgpu_device *adev) struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj, *tmp; - if (!adev->ras_features || !con) { + if (!adev->ras_enabled || !con) { /* clean ras context for VEGA20 Gaming after send ras disable cmd */ amdgpu_release_ras_context(adev); @@ -2365,7 +2365,7 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return; amdgpu_ras_disable_all_features(adev, 0); @@ -2379,7 +2379,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return 0; /* Need disable ras on all IPs here before ip [hw/sw]fini */ @@ -2392,7 +2392,7 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return 0; amdgpu_ras_fs_fini(adev); @@ -2412,7 +2412,7 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) { amdgpu_ras_check_supported(adev); - if (!adev->ras_hw_supported) + if (!adev->ras_hw_enabled) return; if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { @@ -2441,7 +2441,7 @@ void amdgpu_release_ras_context(struct amdgpu_device *adev) if (!con) return; - if (!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) { + if (!adev->ras_enabled && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) { con->features &= ~BIT(AMDGPU_RAS_BLOCK__GFX); amdgpu_ras_set_context(adev, NULL); kfree(con); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index f60d1cfafa3e..201fbdee1d09 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -475,7 +475,7 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, if (block >= AMDGPU_RAS_BLOCK_COUNT) return 0; - return ras && (adev->ras_features & (1 << block)); + return ras && (adev->ras_enabled & (1 << block)); } int amdgpu_ras_recovery_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 6028b55ea52c..093ab98c31bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1262,7 +1262,7 @@ static int gmc_v9_0_late_init(void *handle) * writes, while disables HBM ECC for vega10. */ if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) { - if (!(adev->ras_features & (1 << AMDGPU_RAS_BLOCK__UMC))) { + if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) { if (adev->df.funcs->enable_ecc_force_par_wr_rmw) adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false); } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 301695f3f237..49ece2a7f9f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -655,7 +655,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) int ret = 0; /* avoid NBIF got stuck when do RAS recovery in BACO reset */ - if (ras && adev->ras_features) + if (ras && adev->ras_enabled) adev->nbio.funcs->enable_doorbell_interrupt(adev, false); ret = amdgpu_dpm_baco_reset(adev); @@ -663,7 +663,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) return ret; /* re-enable doorbell interrupt after BACO exit */ - if (ras && adev->ras_features) + if (ras && adev->ras_enabled) adev->nbio.funcs->enable_doorbell_interrupt(adev, true); return 0; @@ -710,7 +710,7 @@ soc15_asic_reset_method(struct amdgpu_device *adev) * 1. PMFW version > 0x284300: all cases use baco * 2. PMFW version <= 0x284300: only sGPU w/o RAS use baco */ - if (ras && adev->ras_features && + if (ras && adev->ras_enabled && adev->pm.fw_version <= 0x283400) baco_reset = false; break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index fb4f718a1148..7fae6a7e51f5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1430,13 +1430,13 @@ int kfd_topology_add_device(struct kfd_dev *gpu) adev = (struct amdgpu_device *)(dev->gpu->kgd); /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */ dev->node_props.capability |= - ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? + ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? HSA_CAP_SRAM_EDCSUPPORTED : 0; - dev->node_props.capability |= ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? + dev->node_props.capability |= ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? HSA_CAP_MEM_EDCSUPPORTED : 0; if (adev->asic_type != CHIP_VEGA10) - dev->node_props.capability |= (adev->ras_features != 0) ? + dev->node_props.capability |= (adev->ras_enabled != 0) ? HSA_CAP_RASEVENTNOTIFY : 0; /* SVM API and HMM page migration work together, device memory type diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c index f6b1efce450c..8d99c7a5abf8 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c @@ -85,7 +85,7 @@ int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state) return 0; if (state == BACO_STATE_IN) { - if (!ras || !adev->ras_features) { + if (!ras || !adev->ras_enabled) { data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL); data |= 0x80000000; WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 72581e43d83e..a06e6865507d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1531,7 +1531,7 @@ int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state) NULL); break; default: - if (!ras || !adev->ras_features || + if (!ras || !adev->ras_enabled || adev->gmc.xgmi.pending_reset) { if (adev->asic_type == CHIP_ARCTURUS) { data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL_ARCT); -- cgit v1.2.3-70-g09d2 From 011907fda3605177f4fc05bf08fcf0fceaabda49 Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Mon, 10 May 2021 11:04:59 +0800 Subject: drm/amdgpu: covert ras status to kernel errno The original codes use ras status and kernl errno together in the same function, which is a wrong code style. Signed-off-by: Dennis Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 29 ++++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 43 ++++++--------------------------- 2 files changed, 34 insertions(+), 38 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 3179ca9fc03f..f7bbb04d01ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1114,6 +1114,31 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) return ret; } +static int psp_ras_status_to_errno(struct amdgpu_device *adev, + enum ta_ras_status ras_status) +{ + int ret = -EINVAL; + + switch (ras_status) { + case TA_RAS_STATUS__SUCCESS: + ret = 0; + break; + case TA_RAS_STATUS__RESET_NEEDED: + ret = -EAGAIN; + break; + case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE: + dev_warn(adev->dev, "RAS WARN: ras function unavailable\n"); + break; + case TA_RAS_STATUS__ERROR_ASD_READ_WRITE: + dev_warn(adev->dev, "RAS WARN: asd read or write failed\n"); + break; + default: + dev_err(adev->dev, "RAS ERROR: ras function failed ret 0x%X\n", ret); + } + + return ret; +} + int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable) { @@ -1137,7 +1162,7 @@ int psp_ras_enable_features(struct psp_context *psp, if (ret) return -EINVAL; - return ras_cmd->ras_status; + return psp_ras_status_to_errno(psp->adev, ras_cmd->ras_status); } static int psp_ras_terminate(struct psp_context *psp) @@ -1220,7 +1245,7 @@ int psp_ras_trigger_error(struct psp_context *psp, if (amdgpu_ras_intr_triggered()) return 0; - return ras_cmd->ras_status; + return psp_ras_status_to_errno(psp->adev, ras_cmd->ras_status); } // ras end diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index a94be181d066..4eebb97994d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -586,29 +586,6 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, } /* obj end */ -static void amdgpu_ras_parse_status_code(struct amdgpu_device *adev, - const char* invoke_type, - const char* block_name, - enum ta_ras_status ret) -{ - switch (ret) { - case TA_RAS_STATUS__SUCCESS: - return; - case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE: - dev_warn(adev->dev, - "RAS WARN: %s %s currently unavailable\n", - invoke_type, - block_name); - break; - default: - dev_err(adev->dev, - "RAS ERROR: %s %s error failed ret 0x%X\n", - invoke_type, - block_name, - ret); - } -} - /* feature ctl begin */ static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev, struct ras_common_if *head) @@ -703,15 +680,10 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, if (!amdgpu_ras_intr_triggered()) { ret = psp_ras_enable_features(&adev->psp, info, enable); if (ret) { - amdgpu_ras_parse_status_code(adev, - enable ? "enable":"disable", - ras_block_str(head->block), - (enum ta_ras_status)ret); - if (ret == TA_RAS_STATUS__RESET_NEEDED) - ret = -EAGAIN; - else - ret = -EINVAL; - + dev_err(adev->dev, "ras %s %s failed %d\n", + enable ? "enable":"disable", + ras_block_str(head->block), + ret); goto out; } } @@ -1056,10 +1028,9 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, ret = -EINVAL; } - amdgpu_ras_parse_status_code(adev, - "inject", - ras_block_str(info->head.block), - (enum ta_ras_status)ret); + if (ret) + dev_err(adev->dev, "ras inject %s failed %d\n", + ras_block_str(info->head.block), ret); return ret; } -- cgit v1.2.3-70-g09d2