From 38d4e4638e85ae52bec2c33869b2131e24d49229 Mon Sep 17 00:00:00 2001 From: Jingwen Chen Date: Fri, 22 Oct 2021 11:30:01 +0800 Subject: drm/amd/amdgpu: fix bad job hw_fence use after free in advance tdr [Why] In advance tdr mode, the real bad job will be resubmitted twice, while in drm_sched_resubmit_jobs_ext, there's a dma_fence_put, so the bad job is put one more time than other jobs. [How] Adding dma_fence_get before resbumit job in amdgpu_device_recheck_guilty_jobs and put the fence for normal jobs Signed-off-by: Jingwen Chen Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6e40cc1bc6dc..7c3f6ee14853 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4850,6 +4850,9 @@ static void amdgpu_device_recheck_guilty_jobs( /* clear job's guilty and depend the folowing step to decide the real one */ drm_sched_reset_karma(s_job); + /* for the real bad job, it will be resubmitted twice, adding a dma_fence_get + * to make sure fence is balanced */ + dma_fence_get(s_job->s_fence->parent); drm_sched_resubmit_jobs_ext(&ring->sched, 1); ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout); @@ -4885,6 +4888,7 @@ retry: /* got the hw fence, signal finished fence */ atomic_dec(ring->sched.score); + dma_fence_put(s_job->s_fence->parent); dma_fence_get(&s_job->s_fence->finished); dma_fence_signal(&s_job->s_fence->finished); dma_fence_put(&s_job->s_fence->finished); -- cgit v1.2.3-70-g09d2