diff options
author | Jiadong Zhu <Jiadong.Zhu@amd.com> | 2024-07-02 09:17:14 +0800 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2024-09-02 11:40:47 -0400 |
commit | 097af47d3cfb99ee02afbdd7e0d9596eb012c65a (patch) | |
tree | cc05a600f44479556519904f19b32a851bc09a69 | |
parent | 2f3806f781421ce6dfa60471eb2116627c0eb893 (diff) |
drm/amdgpu/gfx10: wait for reset done before remap
There is a racing condition that cp firmware modifies
MQD in reset sequence after driver updates it for
remapping. We have to wait till CP_HQD_ACTIVE becoming
false then remap the queue.
v2: fix KIQ locking (Alex)
v3: fix KIQ locking harder (Jessie)
Acked-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
Signed-off-by: Jiadong Zhu <Jiadong.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 41 |
1 files changed, 30 insertions, 11 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 8038b62feb84..ad113fb67a85 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9475,7 +9475,7 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; struct amdgpu_ring *kiq_ring = &kiq->ring; unsigned long flags; - int r; + int i, r; if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -9483,22 +9483,42 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, spin_lock_irqsave(&kiq->ring_lock, flags); if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { - r = -ENOMEM; - goto out; + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; } kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 0, 0); amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_ring_test_ring(kiq_ring); if (r) - goto out; + return r; + + /* make sure dequeue is complete*/ + gfx_v10_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v10_0_unset_safe_mode(adev, 0); + if (r) { + dev_err(adev->dev, "fail to wait on hqd deactivate\n"); + return r; + } r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) { dev_err(adev->dev, "fail to resv mqd_obj\n"); - goto out; + return r; } r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { @@ -9509,20 +9529,19 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, amdgpu_bo_unreserve(ring->mqd_obj); if (r) { dev_err(adev->dev, "fail to unresv mqd_obj\n"); - goto out; + return r; } + spin_lock_irqsave(&kiq->ring_lock, flags); if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) { - r = -ENOMEM; - goto out; + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; } kiq->pmf->kiq_map_queues(kiq_ring, ring); amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_ring_test_ring(kiq_ring); - -out: - spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) return r; |