diff options
author | Yang Wang <kevinyang.wang@amd.com> | 2024-03-13 12:50:43 +0800 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2024-03-20 13:38:13 -0400 |
commit | 9dc57c2adf2c307a672f15b4be17c6c14e37cfb9 (patch) | |
tree | 72fb81dbb81aa04378fd01ddc077f7bae22e8d77 /drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | |
parent | d5586e2f5367829880932c8f057f4df9445056cc (diff) |
drm/amdgpu: add ras event id support
add amdgpu ras event id support to better distinguish different
error information sources in dmesg logs.
the following log will be identify by event id:
{event_id} interrupt to inform RAS event
{event_id} ACA logs
{event_id} errors statistic since from current injection/error query
{event_id} errors statistic since from gpu load
Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 32 |
1 files changed, 18 insertions, 14 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 24ad4b97177b..0734490347db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -210,22 +210,26 @@ int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) return -EOPNOTSUPP; } -static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry) +static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry, + struct ras_query_context *qctx) { - dev_info(adev->dev, HW_ERR "Accelerator Check Architecture events logged\n"); - dev_info(adev->dev, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_STATUS]); - dev_info(adev->dev, HW_ERR "aca entry[%02d].ADDR=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_ADDR]); - dev_info(adev->dev, HW_ERR "aca entry[%02d].MISC0=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_MISC0]); - dev_info(adev->dev, HW_ERR "aca entry[%02d].IPID=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_IPID]); - dev_info(adev->dev, HW_ERR "aca entry[%02d].SYND=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_SYND]); + u64 event_id = qctx->event_id; + + RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n"); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_STATUS]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].ADDR=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_ADDR]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].MISC0=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_MISC0]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].IPID=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_IPID]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].SYND=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_SYND]); } -int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data) +int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct ras_err_data *err_data, struct ras_query_context *qctx) { struct amdgpu_smuio_mcm_config_info mcm_info; struct ras_err_addr err_addr = {0}; @@ -244,7 +248,7 @@ int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_blo list_for_each_entry(node, &mca_set.list, node) { entry = &node->entry; - amdgpu_mca_smu_mca_bank_dump(adev, i++, entry); + amdgpu_mca_smu_mca_bank_dump(adev, i++, entry, qctx); count = 0; ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, entry, &count); |