From b205795677c034a1975e75a466ad158950fde4b4 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 11 Jun 2020 23:19:37 -0400 Subject: drm/amdkfd: Add eviction debug messages Use WARN to print messages with backtrace when evictions are triggered. This can help determine the root cause of evictions and help spot driver bugs triggering evictions unintentionally, or help with performance tuning by avoiding conditions that cause evictions in a specific workload. The messages are controlled by a new module parameter that can be changed at runtime: echo Y > /sys/module/amdgpu/parameters/debug_evictions echo N > /sys/module/amdgpu/parameters/debug_evictions Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index b87ca171986a..072f0e1185a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -275,6 +275,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, continue; } + WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD, + "Adding eviction fence to sync obj"); r = amdgpu_sync_fence(sync, f, false); if (r) break; -- cgit v1.2.3-70-g09d2 From 174b328bc89b5fa9cfec57831670e8100a1e0da0 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 27 May 2020 10:31:08 +0200 Subject: drm/amdgpu: remove distinction between explicit and implicit sync (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to Marek a pipeline sync should be inserted for implicit syncs well. v2: bump the driver version Signed-off-by: Christian König Tested-by: Marek Olšák Signed-off-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 8 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 12 ++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 15 +++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 31 +++++++----------------- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 6 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 2 +- 9 files changed, 33 insertions(+), 50 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 68e6e1bc8f3a..c408936e8f98 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -395,7 +395,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) if (ret) return ret; - return amdgpu_sync_fence(sync, vm->last_update, false); + return amdgpu_sync_fence(sync, vm->last_update); } static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) @@ -785,7 +785,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); - amdgpu_sync_fence(sync, bo_va->last_pt_update, false); + amdgpu_sync_fence(sync, bo_va->last_pt_update); return 0; } @@ -804,7 +804,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, return ret; } - return amdgpu_sync_fence(sync, bo_va->last_pt_update, false); + return amdgpu_sync_fence(sync, bo_va->last_pt_update); } static int map_bo_to_gpuvm(struct amdgpu_device *adev, @@ -2102,7 +2102,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) pr_debug("Memory eviction: Validate BOs failed. Try again\n"); goto validate_map_fail; } - ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false); + ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving); if (ret) { pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); goto validate_map_fail; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 19070226a945..ffbcaf4bfb8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -992,7 +992,7 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, dma_fence_put(old); } - r = amdgpu_sync_fence(&p->job->sync, fence, true); + r = amdgpu_sync_fence(&p->job->sync, fence); dma_fence_put(fence); if (r) return r; @@ -1014,7 +1014,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, return r; } - r = amdgpu_sync_fence(&p->job->sync, fence, true); + r = amdgpu_sync_fence(&p->job->sync, fence); dma_fence_put(fence); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8db76677615c..2f12f3ae3c7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -87,9 +87,10 @@ * - 3.36.0 - Allow reading more status registers on si/cik * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC + * - 3.39.0 - DMABUF implicit sync does a full pipeline sync */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 38 +#define KMS_DRIVER_MINOR 39 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index b91853fd66d3..4ffc32b78745 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -178,7 +178,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, need_ctx_switch = ring->current_ctx != fence_ctx; if (ring->funcs->emit_pipeline_sync && job && - ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) || + ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || (amdgpu_sriov_vf(adev) && need_ctx_switch) || amdgpu_vm_need_pipeline_sync(ring, job))) { need_pipe_sync = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index fe92dcd94d4a..267fa45ddb66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -206,7 +206,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, int r; if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait)) - return amdgpu_sync_fence(sync, ring->vmid_wait, false); + return amdgpu_sync_fence(sync, ring->vmid_wait); fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); if (!fences) @@ -241,7 +241,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, return -ENOMEM; } - r = amdgpu_sync_fence(sync, &array->base, false); + r = amdgpu_sync_fence(sync, &array->base); dma_fence_put(ring->vmid_wait); ring->vmid_wait = &array->base; return r; @@ -294,7 +294,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); if (tmp) { *id = NULL; - r = amdgpu_sync_fence(sync, tmp, false); + r = amdgpu_sync_fence(sync, tmp); return r; } needs_flush = true; @@ -303,7 +303,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, /* Good we can use this VMID. Remember this submission as * user of the VMID. */ - r = amdgpu_sync_fence(&(*id)->active, fence, false); + r = amdgpu_sync_fence(&(*id)->active, fence); if (r) return r; @@ -375,7 +375,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, /* Good, we can use this VMID. Remember this submission as * user of the VMID. */ - r = amdgpu_sync_fence(&(*id)->active, fence, false); + r = amdgpu_sync_fence(&(*id)->active, fence); if (r) return r; @@ -435,7 +435,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, id = idle; /* Remember this submission as user of the VMID */ - r = amdgpu_sync_fence(&id->active, fence, false); + r = amdgpu_sync_fence(&id->active, fence); if (r) goto error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 47207188c569..2975c4a6e581 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -183,16 +183,13 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, struct amdgpu_job *job = to_amdgpu_job(sched_job); struct amdgpu_vm *vm = job->vm; struct dma_fence *fence; - bool explicit = false; int r; - fence = amdgpu_sync_get_fence(&job->sync, &explicit); - if (fence && explicit) { - if (drm_sched_dependency_optimized(fence, s_entity)) { - r = amdgpu_sync_fence(&job->sched_sync, fence, false); - if (r) - DRM_ERROR("Error adding fence (%d)\n", r); - } + fence = amdgpu_sync_get_fence(&job->sync); + if (fence && drm_sched_dependency_optimized(fence, s_entity)) { + r = amdgpu_sync_fence(&job->sched_sync, fence); + if (r) + DRM_ERROR("Error adding fence (%d)\n", r); } while (fence == NULL && vm && !job->vmid) { @@ -202,7 +199,7 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, if (r) DRM_ERROR("Error getting VM ID (%d)\n", r); - fence = amdgpu_sync_get_fence(&job->sync, NULL); + fence = amdgpu_sync_get_fence(&job->sync); } return fence; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 072f0e1185a8..8ea6c49529e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -35,7 +35,6 @@ struct amdgpu_sync_entry { struct hlist_node node; struct dma_fence *fence; - bool explicit; }; static struct kmem_cache *amdgpu_sync_slab; @@ -129,8 +128,7 @@ static void amdgpu_sync_keep_later(struct dma_fence **keep, * Tries to add the fence to an existing hash entry. Returns true when an entry * was found, false otherwise. */ -static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, - bool explicit) +static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) { struct amdgpu_sync_entry *e; @@ -139,10 +137,6 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, continue; amdgpu_sync_keep_later(&e->fence, f); - - /* Preserve eplicit flag to not loose pipe line sync */ - e->explicit |= explicit; - return true; } return false; @@ -153,27 +147,23 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, * * @sync: sync object to add fence to * @f: fence to sync to - * @explicit: if this is an explicit dependency * * Add the fence to the sync object. */ -int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, - bool explicit) +int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) { struct amdgpu_sync_entry *e; if (!f) return 0; - if (amdgpu_sync_add_later(sync, f, explicit)) + if (amdgpu_sync_add_later(sync, f)) return 0; e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL); if (!e) return -ENOMEM; - e->explicit = explicit; - hash_add(sync->fences, &e->node, f->context); e->fence = dma_fence_get(f); return 0; @@ -194,7 +184,7 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence) return 0; amdgpu_sync_keep_later(&sync->last_vm_update, fence); - return amdgpu_sync_fence(sync, fence, false); + return amdgpu_sync_fence(sync, fence); } /** @@ -221,7 +211,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, /* always sync to the exclusive fence */ f = dma_resv_get_excl(resv); - r = amdgpu_sync_fence(sync, f, false); + r = amdgpu_sync_fence(sync, f); flist = dma_resv_get_list(resv); if (!flist || r) @@ -237,7 +227,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, /* Always sync to moves, no matter what */ if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) { - r = amdgpu_sync_fence(sync, f, false); + r = amdgpu_sync_fence(sync, f); if (r) break; } @@ -277,7 +267,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD, "Adding eviction fence to sync obj"); - r = amdgpu_sync_fence(sync, f, false); + r = amdgpu_sync_fence(sync, f); if (r) break; } @@ -332,11 +322,10 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, * amdgpu_sync_get_fence - get the next fence from the sync object * * @sync: sync object to use - * @explicit: true if the next fence is explicit * * Get and removes the next fence from the sync object not signaled yet. */ -struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit) +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) { struct amdgpu_sync_entry *e; struct hlist_node *tmp; @@ -345,8 +334,6 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit hash_for_each_safe(sync->fences, i, tmp, e, node) { f = e->fence; - if (explicit) - *explicit = e->explicit; hash_del(&e->node); kmem_cache_free(amdgpu_sync_slab, e); @@ -378,7 +365,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) hash_for_each_safe(source->fences, i, tmp, e, node) { f = e->fence; if (!dma_fence_is_signaled(f)) { - r = amdgpu_sync_fence(clone, f, e->explicit); + r = amdgpu_sync_fence(clone, f); if (r) return r; } else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index cfbe5788b8b9..7c0fe20c470d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -47,16 +47,14 @@ struct amdgpu_sync { }; void amdgpu_sync_create(struct amdgpu_sync *sync); -int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, - bool explicit); +int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f); int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence); int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner); struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); -struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, - bool *explicit); +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); void amdgpu_sync_free(struct amdgpu_sync *sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 8d9c6feba660..28bdfb3ac33d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -208,7 +208,7 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, int r; /* Wait for PD/PT moves to be completed */ - r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving, false); + r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving); if (r) return r; -- cgit v1.2.3-70-g09d2