diff options
author | Eric Huang <jinhuieric.huang@amd.com> | 2024-06-03 11:56:03 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2024-06-05 11:25:13 -0400 |
commit | 2656e1ce783a90fa1aa0e11f2915d7c0442bf06f (patch) | |
tree | ebf462a089e9fde96400113c548b1b4f5f7435fc | |
parent | 3e538e43222c37de60cefa6e662beb8b30297300 (diff) |
drm/amdgpu: add reset sources in gpu reset context
reset source or reset cause is very useful info
for reset context, it will be used by events API.
Suggested-by: Lijo Lazar <Lijo.Lazar@amd.com>
Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 34 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 13 |
2 files changed, 47 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index bfdde772b7ee..9deb41d61e8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -160,3 +160,37 @@ void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain) atomic_set(&reset_domain->in_gpu_reset, 0); up_write(&reset_domain->sem); } + +void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf, + size_t len) +{ + struct amdgpu_ring *ring; + + if (!buf || !len) + return; + + switch (rst_ctxt->src) { + case AMDGPU_RESET_SRC_JOB: + if (rst_ctxt->job) { + ring = amdgpu_job_ring(rst_ctxt->job); + snprintf(buf, len, "job hang on ring:%s", ring->name); + } else { + strscpy(buf, "job hang", len); + } + break; + case AMDGPU_RESET_SRC_RAS: + strscpy(buf, "RAS error", len); + break; + case AMDGPU_RESET_SRC_MES: + strscpy(buf, "MES hang", len); + break; + case AMDGPU_RESET_SRC_HWS: + strscpy(buf, "HWS hang", len); + break; + case AMDGPU_RESET_SRC_USER: + strscpy(buf, "user trigger", len); + break; + default: + strscpy(buf, "unknown", len); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 5a9cc043b858..4ae581f3fcb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -36,6 +36,15 @@ enum AMDGPU_RESET_FLAGS { AMDGPU_HOST_FLR = 3, }; +enum AMDGPU_RESET_SRCS { + AMDGPU_RESET_SRC_UNKNOWN, + AMDGPU_RESET_SRC_JOB, + AMDGPU_RESET_SRC_RAS, + AMDGPU_RESET_SRC_MES, + AMDGPU_RESET_SRC_HWS, + AMDGPU_RESET_SRC_USER, +}; + struct amdgpu_reset_context { enum amd_reset_method method; struct amdgpu_device *reset_req_dev; @@ -43,6 +52,7 @@ struct amdgpu_reset_context { struct amdgpu_hive_info *hive; struct list_head *reset_device_list; unsigned long flags; + enum AMDGPU_RESET_SRCS src; }; struct amdgpu_reset_handler { @@ -130,6 +140,9 @@ void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain); void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain); +void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf, + size_t len); + #define for_each_handler(i, handler, reset_ctl) \ for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) && \ (handler = (*reset_ctl->reset_handlers)[i]); \ |