summaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
authorZhigang Luo <Zhigang.Luo@amd.com>2024-03-18 14:13:10 -0400
committerAlex Deucher <alexander.deucher@amd.com>2024-04-09 22:14:03 -0400
commitdfb15c4ab58658aaa6161b546e7eb852ae7cc132 (patch)
tree0e022949559657a6062573e9c496fa0a9f4d68b7 /drivers/gpu
parent24c30a7b12b148cb9002d5b73c4c229138e39eb2 (diff)
amd/amdkfd: sync all devices to wait all processes being evicted
If there are more than one device doing reset in parallel, the first device will call kfd_suspend_all_processes() to evict all processes on all devices, this call takes time to finish. other device will start reset and recover without waiting. if the process has not been evicted before doing recover, it will be restored, then caused page fault. Signed-off-by: Zhigang Luo <Zhigang.Luo@amd.com> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c17
1 files changed, 6 insertions, 11 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 041ec3de55e7..719d6d365e15 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -960,7 +960,6 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
{
struct kfd_node *node;
int i;
- int count;
if (!kfd->init_complete)
return;
@@ -968,12 +967,10 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
/* for runtime suspend, skip locking kfd */
if (!run_pm) {
mutex_lock(&kfd_processes_mutex);
- count = ++kfd_locked;
- mutex_unlock(&kfd_processes_mutex);
-
/* For first KFD device suspend all the KFD processes */
- if (count == 1)
+ if (++kfd_locked == 1)
kfd_suspend_all_processes();
+ mutex_unlock(&kfd_processes_mutex);
}
for (i = 0; i < kfd->num_nodes; i++) {
@@ -984,7 +981,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
{
- int ret, count, i;
+ int ret, i;
if (!kfd->init_complete)
return 0;
@@ -998,12 +995,10 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
/* for runtime resume, skip unlocking kfd */
if (!run_pm) {
mutex_lock(&kfd_processes_mutex);
- count = --kfd_locked;
- mutex_unlock(&kfd_processes_mutex);
-
- WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
- if (count == 0)
+ if (--kfd_locked == 0)
ret = kfd_resume_all_processes();
+ WARN_ONCE(kfd_locked < 0, "KFD suspend / resume ref. error");
+ mutex_unlock(&kfd_processes_mutex);
}
return ret;