From 3612702852acbded39233b1600c8d9f47e40139f Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Wed, 5 Jun 2024 18:48:55 -0400 Subject: drm/amdgpu: Add missing locking for MES API calls Add missing locking at a few places when calling MES APIs to ensure exclusive access to MES queue. Signed-off-by: Mukul Joshi Reviewed-by: Kent Russell Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 62edf6328566..df6c067b1dc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -801,7 +801,9 @@ int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); queue_input.wptr_addr = ring->wptr_gpu_addr; + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); if (r) DRM_ERROR("failed to map legacy queue\n"); @@ -824,7 +826,9 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, queue_input.trail_fence_addr = gpu_addr; queue_input.trail_fence_data = seq; + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); if (r) DRM_ERROR("failed to unmap legacy queue\n"); @@ -845,11 +849,13 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) goto error; } + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->misc_op(&adev->mes, &op_input); if (r) DRM_ERROR("failed to read reg (0x%x)\n", reg); else val = *(adev->mes.read_val_ptr); + amdgpu_mes_unlock(&adev->mes); error: return val; @@ -871,7 +877,9 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev, goto error; } + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + amdgpu_mes_unlock(&adev->mes); if (r) DRM_ERROR("failed to write reg (0x%x)\n", reg); @@ -898,7 +906,9 @@ int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, goto error; } + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + amdgpu_mes_unlock(&adev->mes); if (r) DRM_ERROR("failed to reg_write_reg_wait\n"); @@ -923,7 +933,9 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg, goto error; } + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + amdgpu_mes_unlock(&adev->mes); if (r) DRM_ERROR("failed to reg_write_reg_wait\n"); -- cgit v1.2.3-70-g09d2 From bf349b036d57950e9822b1d11ba12b8e28fa42d1 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 30 May 2024 22:51:03 +0800 Subject: drm/amdgpu: refine mes firmware loading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v1: refine mes firmware loading v2: use dev_info instead of DRM_INFO Signed-off-by: Yang Wang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index df6c067b1dc9..1a65ced60e68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1540,11 +1540,9 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name); if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin", - ucode_prefix); - DRM_INFO("try to fall back to %s\n", fw_name); + dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix); r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], - fw_name); + "amdgpu/%s_mes.bin", ucode_prefix); } if (r) -- cgit v1.2.3-70-g09d2 From 4d14a7405424cfd0d0b72df30d0e4698805746de Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 14 Jun 2024 17:07:58 -0400 Subject: Revert "drm/amdgpu: Add missing locking for MES API calls" This reverts commit 3612702852acbded39233b1600c8d9f47e40139f. This is causing a BUG message during suspend. [ 61.603542] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:283 [ 61.603550] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2028, name: kworker/u64:14 [ 61.603553] preempt_count: 1, expected: 0 [ 61.603555] RCU nest depth: 0, expected: 0 [ 61.603557] Preemption disabled at: [ 61.603559] [] amdgpu_gfx_disable_kgq+0x61/0x160 [amdgpu] [ 61.603789] CPU: 9 PID: 2028 Comm: kworker/u64:14 Tainted: G W 6.8.0+ #7 [ 61.603795] Workqueue: events_unbound async_run_entry_fn [ 61.603801] Call Trace: [ 61.603803] [ 61.603806] dump_stack_lvl+0x37/0x50 [ 61.603811] ? amdgpu_gfx_disable_kgq+0x61/0x160 [amdgpu] [ 61.604007] dump_stack+0x10/0x20 [ 61.604010] __might_resched+0x16f/0x1d0 [ 61.604016] __might_sleep+0x43/0x70 [ 61.604020] mutex_lock+0x1f/0x60 [ 61.604024] amdgpu_mes_unmap_legacy_queue+0x6d/0x100 [amdgpu] [ 61.604226] gfx11_kiq_unmap_queues+0x3dc/0x430 [amdgpu] [ 61.604422] ? srso_alias_return_thunk+0x5/0xfbef5 [ 61.604429] amdgpu_gfx_disable_kgq+0x122/0x160 [amdgpu] [ 61.604621] gfx_v11_0_hw_fini+0xda/0x100 [amdgpu] [ 61.604814] gfx_v11_0_suspend+0xe/0x20 [amdgpu] [ 61.605008] amdgpu_device_ip_suspend_phase2+0x135/0x1d0 [amdgpu] [ 61.605175] amdgpu_device_suspend+0xec/0x180 [amdgpu] Signed-off-by: Mukul Joshi Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 1a65ced60e68..48a5b0713fed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -801,9 +801,7 @@ int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); queue_input.wptr_addr = ring->wptr_gpu_addr; - amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input); - amdgpu_mes_unlock(&adev->mes); if (r) DRM_ERROR("failed to map legacy queue\n"); @@ -826,9 +824,7 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, queue_input.trail_fence_addr = gpu_addr; queue_input.trail_fence_data = seq; - amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input); - amdgpu_mes_unlock(&adev->mes); if (r) DRM_ERROR("failed to unmap legacy queue\n"); @@ -849,13 +845,11 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) goto error; } - amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->misc_op(&adev->mes, &op_input); if (r) DRM_ERROR("failed to read reg (0x%x)\n", reg); else val = *(adev->mes.read_val_ptr); - amdgpu_mes_unlock(&adev->mes); error: return val; @@ -877,9 +871,7 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev, goto error; } - amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->misc_op(&adev->mes, &op_input); - amdgpu_mes_unlock(&adev->mes); if (r) DRM_ERROR("failed to write reg (0x%x)\n", reg); @@ -906,9 +898,7 @@ int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, goto error; } - amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->misc_op(&adev->mes, &op_input); - amdgpu_mes_unlock(&adev->mes); if (r) DRM_ERROR("failed to reg_write_reg_wait\n"); @@ -933,9 +923,7 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg, goto error; } - amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->misc_op(&adev->mes, &op_input); - amdgpu_mes_unlock(&adev->mes); if (r) DRM_ERROR("failed to reg_write_reg_wait\n"); -- cgit v1.2.3-70-g09d2 From 19797687e64b961f7c5aac9cf60951561aec038e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Jun 2024 16:59:22 -0400 Subject: drm/amdgpu: remove amdgpu_mes_fence_wait_polling() No longer used so remove it. Reviewed-by: Mukul Joshi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 12 ------------ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 4 ---- 2 files changed, 16 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 48a5b0713fed..e499d6ba306b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -32,18 +32,6 @@ #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 #define AMDGPU_ONE_DOORBELL_SIZE 8 -signed long amdgpu_mes_fence_wait_polling(u64 *fence, - u64 wait_seq, - signed long timeout) -{ - - while ((s64)(wait_seq - *fence) > 0 && timeout > 0) { - udelay(2); - timeout -= 2; - } - return timeout > 0 ? timeout : 0; -} - int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) { return roundup(AMDGPU_ONE_DOORBELL_SIZE * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index df9f0404d842..e11051271f71 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -352,10 +352,6 @@ struct amdgpu_mes_funcs { #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) #define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev)) -signed long amdgpu_mes_fence_wait_polling(u64 *fence, - u64 wait_seq, - signed long timeout); - int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs); int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe); -- cgit v1.2.3-70-g09d2