diff options
author | Yang Wang <kevinyang.wang@amd.com> | 2024-06-25 14:23:42 +0800 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2024-07-08 16:55:11 -0400 |
commit | 75ac6a250632d2fff62039ae728c842033dceddb (patch) | |
tree | 85bc2a059de2d80b8ee9a89ba83727a4744f997d /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | |
parent | e33697141bac18906345ea46533a240f1ad3cd21 (diff) |
drm/amdgpu: refine amdgpu ras event id core code
v1:
- use unified event id to manage ras events
- add a new function amdgpu_ras_query_error_status_with_event() to accept
event type as parameter.
v2:
add a warn log to show the location of function failure
when calling amdgpu_ras_mark_event(). (Tao Zhou)
v3:
change RAS_EVENT_TYPE_ISR to RAS_EVENT_TYPE_FATAL.
v4:
rename amdgpu_ras_get_recovery_event() to
amdgpu_ras_get_fatal_error_event().
Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 22 |
1 files changed, 17 insertions, 5 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 18d994c98a25..7c20def1c4f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -68,9 +68,15 @@ struct amdgpu_iv_entry; /* The high three bits indicates socketid */ #define AMDGPU_RAS_GET_FEATURES(val) ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK) +#define RAS_EVENT_INVALID_ID (BIT_ULL(63)) +#define RAS_EVENT_ID_IS_VALID(x) (!((x) & BIT_ULL(63))) + #define RAS_EVENT_LOG(adev, id, fmt, ...) \ amdgpu_ras_event_log_print((adev), (id), (fmt), ##__VA_ARGS__) +#define amdgpu_ras_mark_ras_event(adev, type) \ + (amdgpu_ras_mark_ras_event_caller((adev), (type), __builtin_return_address(0))) + enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__UMC = 0, AMDGPU_RAS_BLOCK__SDMA, @@ -427,20 +433,25 @@ struct umc_ecc_info { }; enum ras_event_type { - RAS_EVENT_TYPE_INVALID = -1, - RAS_EVENT_TYPE_ISR = 0, + RAS_EVENT_TYPE_INVALID = 0, + RAS_EVENT_TYPE_FATAL, RAS_EVENT_TYPE_COUNT, }; struct ras_event_manager { - atomic64_t seqnos[RAS_EVENT_TYPE_COUNT]; + atomic64_t seqno; + u64 last_seqno[RAS_EVENT_TYPE_COUNT]; }; -struct ras_query_context { +struct ras_event_id { enum ras_event_type type; u64 event_id; }; +struct ras_query_context { + struct ras_event_id evid; +}; + typedef int (*pasid_notify)(struct amdgpu_device *adev, uint16_t pasid, void *data); @@ -947,8 +958,9 @@ void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status); bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev); -bool amdgpu_ras_event_id_is_valid(struct amdgpu_device *adev, u64 id); u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type); +int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type, + const void *caller); int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn); |