From 02599bc7f7047f2b316ab499f41d72ca14e3b3d3 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Mon, 20 Dec 2021 17:27:37 -0500 Subject: drm/amd/virt: For SRIOV send GPU reset directly to TDR queue. No need to to trigger another work queue inside the work queue. v3: Problem: Extra reset caused by host side FLR notification following guest side triggered reset. Fix: Preven qeuing flr_work from mailbox irq if guest already executing a reset. Suggested-by: Liu Shaoyun Signed-off-by: Andrey Grodzovsky Reviewed-by: Liu Shaoyun Link: https://www.spinics.net/lists/amd-gfx/msg74114.html --- drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 477d0dde19c5..5728a6401d73 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -309,7 +309,7 @@ flr_done: adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT || adev->compute_timeout == MAX_SCHEDULE_TIMEOUT || adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) - amdgpu_device_gpu_recover(adev, NULL); + amdgpu_device_gpu_recover_imp(adev, NULL); } static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev, @@ -337,8 +337,11 @@ static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev, switch (event) { case IDH_FLR_NOTIFICATION: - if (amdgpu_sriov_runtime(adev)) - schedule_work(&adev->virt.flr_work); + if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev)) + WARN_ONCE(!queue_work(adev->reset_domain.wq, + &adev->virt.flr_work), + "Failed to queue work! at %s", + __func__); break; /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore * it byfar since that polling thread will handle it, -- cgit v1.2.3-70-g09d2