diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 138 |
1 files changed, 95 insertions, 43 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7d6daf8d2bfa..41fbc4fd0fac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -44,6 +44,7 @@ * changes to accumulate */ #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 +#define AMDGPU_RESERVE_MEM_LIMIT (3UL << 29) /* * Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB @@ -117,11 +118,16 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) return; si_meminfo(&si); - mem = si.freeram - si.freehigh; + mem = si.totalram - si.totalhigh; mem *= si.mem_unit; spin_lock_init(&kfd_mem_limit.mem_limit_lock); - kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4); + kfd_mem_limit.max_system_mem_limit = mem - (mem >> 6); + if (kfd_mem_limit.max_system_mem_limit < 2 * AMDGPU_RESERVE_MEM_LIMIT) + kfd_mem_limit.max_system_mem_limit >>= 1; + else + kfd_mem_limit.max_system_mem_limit -= AMDGPU_RESERVE_MEM_LIMIT; + kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT; pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", (kfd_mem_limit.max_system_mem_limit >> 20), @@ -419,6 +425,32 @@ validate_fail: return ret; } +static int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, + uint32_t domain, + struct dma_fence *fence) +{ + int ret = amdgpu_bo_reserve(bo, false); + + if (ret) + return ret; + + ret = amdgpu_amdkfd_bo_validate(bo, domain, true); + if (ret) + goto unreserve_out; + + ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1); + if (ret) + goto unreserve_out; + + dma_resv_add_fence(bo->tbo.base.resv, fence, + DMA_RESV_USAGE_BOOKKEEP); + +unreserve_out: + amdgpu_bo_unreserve(bo); + + return ret; +} + static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo) { return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false); @@ -733,7 +765,7 @@ kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem, enum dma_data_direction dir; if (unlikely(!ttm->sg)) { - pr_err("SG Table of BO is UNEXPECTEDLY NULL"); + pr_debug("SG Table of BO is NULL"); return; } @@ -828,6 +860,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, uint64_t va = mem->va; struct kfd_mem_attachment *attachment[2] = {NULL, NULL}; struct amdgpu_bo *bo[2] = {NULL, NULL}; + struct amdgpu_bo_va *bo_va; bool same_hive = false; int i, ret; @@ -866,9 +899,10 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) || (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && reuse_dmamap(adev, bo_adev)) || - same_hive) { + (mem->domain == AMDGPU_GEM_DOMAIN_GTT && reuse_dmamap(adev, bo_adev)) || + same_hive) { /* Mappings on the local GPU, or VRAM mappings in the - * local hive, or userptr mapping can reuse dma map + * local hive, or userptr, or GTT mapping can reuse dma map * address space share the original BO */ attachment[i]->type = KFD_MEM_ATT_SHARED; @@ -914,7 +948,12 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, pr_debug("Unable to reserve BO during memory attach"); goto unwind; } - attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]); + bo_va = amdgpu_vm_bo_find(vm, bo[i]); + if (!bo_va) + bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]); + else + ++bo_va->ref_count; + attachment[i]->bo_va = bo_va; amdgpu_bo_unreserve(bo[i]); if (unlikely(!attachment[i]->bo_va)) { ret = -ENOMEM; @@ -938,7 +977,8 @@ unwind: continue; if (attachment[i]->bo_va) { amdgpu_bo_reserve(bo[i], true); - amdgpu_vm_bo_del(adev, attachment[i]->bo_va); + if (--attachment[i]->bo_va->ref_count == 0) + amdgpu_vm_bo_del(adev, attachment[i]->bo_va); amdgpu_bo_unreserve(bo[i]); list_del(&attachment[i]->list); } @@ -955,7 +995,8 @@ static void kfd_mem_detach(struct kfd_mem_attachment *attachment) pr_debug("\t remove VA 0x%llx in entry %p\n", attachment->va, attachment); - amdgpu_vm_bo_del(attachment->adev, attachment->bo_va); + if (--attachment->bo_va->ref_count == 0) + amdgpu_vm_bo_del(attachment->adev, attachment->bo_va); drm_gem_object_put(&bo->tbo.base); list_del(&attachment->list); kfree(attachment); @@ -1103,7 +1144,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, if (unlikely(ret)) goto error; - ret = drm_exec_lock_obj(&ctx->exec, &bo->tbo.base); + ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1); drm_exec_retry_on_contention(&ctx->exec); if (unlikely(ret)) goto error; @@ -1201,8 +1242,6 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem, amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); amdgpu_sync_fence(sync, bo_va->last_pt_update); - - kfd_mem_dmaunmap_attachment(mem, entry); } static int update_gpuvm_pte(struct kgd_mem *mem, @@ -1257,6 +1296,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem, update_gpuvm_pte_failed: unmap_bo_from_gpuvm(mem, entry, sync); + kfd_mem_dmaunmap_attachment(mem, entry); return ret; } @@ -1690,6 +1730,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT) alloc_flags |= AMDGPU_GEM_CREATE_COHERENT; + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT) + alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENT; if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED) alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED; @@ -1768,6 +1810,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( } bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; + } else { + mutex_lock(&avm->process_info->lock); + if (avm->process_info->eviction_fence && + !dma_fence_is_signaled(&avm->process_info->eviction_fence->base)) + ret = amdgpu_amdkfd_bo_validate_and_fence(bo, domain, + &avm->process_info->eviction_fence->base); + mutex_unlock(&avm->process_info->lock); + if (ret) + goto err_validate_bo; } if (offset) @@ -1777,6 +1828,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( allocate_init_user_pages_failed: err_pin_bo: +err_validate_bo: remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); drm_vma_node_revoke(&gobj->vma_node, drm_priv); err_node_allow: @@ -1850,18 +1902,16 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( if (unlikely(ret)) return ret; - /* The eviction fence should be removed by the last unmap. - * TODO: Log an error condition if the bo still has the eviction fence - * attached - */ amdgpu_amdkfd_remove_eviction_fence(mem->bo, process_info->eviction_fence); pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va, mem->va + bo_size * (1 + mem->aql_queue)); /* Remove from VM internal data structures */ - list_for_each_entry_safe(entry, tmp, &mem->attachments, list) + list_for_each_entry_safe(entry, tmp, &mem->attachments, list) { + kfd_mem_dmaunmap_attachment(mem, entry); kfd_mem_detach(entry); + } ret = unreserve_bo_and_vms(&ctx, false, false); @@ -1980,19 +2030,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( if (unlikely(ret)) goto out_unreserve; - if (mem->mapped_to_gpu_memory == 0 && - !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { - /* Validate BO only once. The eviction fence gets added to BO - * the first time it is mapped. Validate will wait for all - * background evictions to complete. - */ - ret = amdgpu_amdkfd_bo_validate(bo, domain, true); - if (ret) { - pr_debug("Validate failed\n"); - goto out_unreserve; - } - } - list_for_each_entry(entry, &mem->attachments, list) { if (entry->bo_va->base.vm != avm || entry->is_mapped) continue; @@ -2019,10 +2056,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( mem->mapped_to_gpu_memory); } - if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count) - dma_resv_add_fence(bo->tbo.base.resv, - &avm->process_info->eviction_fence->base, - DMA_RESV_USAGE_BOOKKEEP); ret = unreserve_bo_and_vms(&ctx, false, false); goto out; @@ -2035,11 +2068,27 @@ out: return ret; } +void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv) +{ + struct kfd_mem_attachment *entry; + struct amdgpu_vm *vm; + + vm = drm_priv_to_vm(drm_priv); + + mutex_lock(&mem->lock); + + list_for_each_entry(entry, &mem->attachments, list) { + if (entry->bo_va->base.vm == vm) + kfd_mem_dmaunmap_attachment(mem, entry); + } + + mutex_unlock(&mem->lock); +} + int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); - struct amdkfd_process_info *process_info = avm->process_info; unsigned long bo_size = mem->bo->tbo.base.size; struct kfd_mem_attachment *entry; struct bo_vm_reservation_context ctx; @@ -2080,15 +2129,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( mem->mapped_to_gpu_memory); } - /* If BO is unmapped from all VMs, unfence it. It can be evicted if - * required. - */ - if (mem->mapped_to_gpu_memory == 0 && - !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && - !mem->bo->tbo.pin_count) - amdgpu_amdkfd_remove_eviction_fence(mem->bo, - process_info->eviction_fence); - unreserve_out: unreserve_bo_and_vms(&ctx, false, false); out: @@ -2316,8 +2356,20 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, amdgpu_sync_create(&(*mem)->sync); (*mem)->is_imported = true; + mutex_lock(&avm->process_info->lock); + if (avm->process_info->eviction_fence && + !dma_fence_is_signaled(&avm->process_info->eviction_fence->base)) + ret = amdgpu_amdkfd_bo_validate_and_fence(bo, (*mem)->domain, + &avm->process_info->eviction_fence->base); + mutex_unlock(&avm->process_info->lock); + if (ret) + goto err_remove_mem; + return 0; +err_remove_mem: + remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); + drm_vma_node_revoke(&obj->vma_node, drm_priv); err_free_mem: kfree(*mem); err_put_obj: |