From 18f36157f2cbb1cfc7d67642c40deddbcece0553 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Fri, 21 Feb 2020 21:43:44 +0800 Subject: drm/amdgpu: add helper funcs to detect PCS error Since from vega20, hardware supports run-time detect and report XGMI/WAFL PCS ras error. Add helper functions to walkthrough every type of ras error and report it if any. Signed-off-by: Hawking Zhang Reviewed-by: Guchun Chen Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 2aa61adee459..4a92067fe595 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -37,6 +37,12 @@ struct amdgpu_hive_info { struct task_barrier tb; }; +struct amdgpu_pcs_ras_field { + const char *err_name; + uint32_t pcs_err_mask; + uint32_t pcs_err_shift; +}; + struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock); int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev); int amdgpu_xgmi_add_device(struct amdgpu_device *adev); @@ -48,6 +54,8 @@ int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev); void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev); uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr); +int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status); static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, struct amdgpu_device *bo_adev) -- cgit v1.2.3-70-g09d2