summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Huang <jinhuieric.huang@amd.com>2024-06-03 11:56:03 -0400
committerAlex Deucher <alexander.deucher@amd.com>2024-06-05 11:25:13 -0400
commit2656e1ce783a90fa1aa0e11f2915d7c0442bf06f (patch)
treeebf462a089e9fde96400113c548b1b4f5f7435fc
parent3e538e43222c37de60cefa6e662beb8b30297300 (diff)
drm/amdgpu: add reset sources in gpu reset context
reset source or reset cause is very useful info for reset context, it will be used by events API. Suggested-by: Lijo Lazar <Lijo.Lazar@amd.com> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com> Reviewed-by: Lijo Lazar <lijo.lazar@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h13
2 files changed, 47 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index bfdde772b7ee..9deb41d61e8d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -160,3 +160,37 @@ void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain)
atomic_set(&reset_domain->in_gpu_reset, 0);
up_write(&reset_domain->sem);
}
+
+void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf,
+ size_t len)
+{
+ struct amdgpu_ring *ring;
+
+ if (!buf || !len)
+ return;
+
+ switch (rst_ctxt->src) {
+ case AMDGPU_RESET_SRC_JOB:
+ if (rst_ctxt->job) {
+ ring = amdgpu_job_ring(rst_ctxt->job);
+ snprintf(buf, len, "job hang on ring:%s", ring->name);
+ } else {
+ strscpy(buf, "job hang", len);
+ }
+ break;
+ case AMDGPU_RESET_SRC_RAS:
+ strscpy(buf, "RAS error", len);
+ break;
+ case AMDGPU_RESET_SRC_MES:
+ strscpy(buf, "MES hang", len);
+ break;
+ case AMDGPU_RESET_SRC_HWS:
+ strscpy(buf, "HWS hang", len);
+ break;
+ case AMDGPU_RESET_SRC_USER:
+ strscpy(buf, "user trigger", len);
+ break;
+ default:
+ strscpy(buf, "unknown", len);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index 5a9cc043b858..4ae581f3fcb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -36,6 +36,15 @@ enum AMDGPU_RESET_FLAGS {
AMDGPU_HOST_FLR = 3,
};
+enum AMDGPU_RESET_SRCS {
+ AMDGPU_RESET_SRC_UNKNOWN,
+ AMDGPU_RESET_SRC_JOB,
+ AMDGPU_RESET_SRC_RAS,
+ AMDGPU_RESET_SRC_MES,
+ AMDGPU_RESET_SRC_HWS,
+ AMDGPU_RESET_SRC_USER,
+};
+
struct amdgpu_reset_context {
enum amd_reset_method method;
struct amdgpu_device *reset_req_dev;
@@ -43,6 +52,7 @@ struct amdgpu_reset_context {
struct amdgpu_hive_info *hive;
struct list_head *reset_device_list;
unsigned long flags;
+ enum AMDGPU_RESET_SRCS src;
};
struct amdgpu_reset_handler {
@@ -130,6 +140,9 @@ void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain);
void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain);
+void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf,
+ size_t len);
+
#define for_each_handler(i, handler, reset_ctl) \
for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) && \
(handler = (*reset_ctl->reset_handlers)[i]); \