diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 570 |
1 files changed, 308 insertions, 262 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9c85a90be293..663043c8f0f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -31,45 +31,79 @@ #include <drm/drm_syncobj.h> #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_gmc.h" +#include "amdgpu_gem.h" static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, struct drm_amdgpu_cs_chunk_fence *data, uint32_t *offset) { struct drm_gem_object *gobj; + struct amdgpu_bo *bo; unsigned long size; + int r; gobj = drm_gem_object_lookup(p->filp, data->handle); if (gobj == NULL) return -EINVAL; - p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); + bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); p->uf_entry.priority = 0; - p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; + p->uf_entry.tv.bo = &bo->tbo; p->uf_entry.tv.shared = true; p->uf_entry.user_pages = NULL; - size = amdgpu_bo_size(p->uf_entry.robj); - if (size != PAGE_SIZE || (data->offset + 8) > size) - return -EINVAL; + drm_gem_object_put_unlocked(gobj); + + size = amdgpu_bo_size(bo); + if (size != PAGE_SIZE || (data->offset + 8) > size) { + r = -EINVAL; + goto error_unref; + } + + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { + r = -EINVAL; + goto error_unref; + } *offset = data->offset; - drm_gem_object_put_unlocked(gobj); + return 0; - if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) { - amdgpu_bo_unref(&p->uf_entry.robj); - return -EINVAL; - } +error_unref: + amdgpu_bo_unref(&bo); + return r; +} + +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, + struct drm_amdgpu_bo_list_in *data) +{ + int r; + struct drm_amdgpu_bo_list_entry *info = NULL; + + r = amdgpu_bo_create_list_entry_array(data, &info); + if (r) + return r; + r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, + &p->bo_list); + if (r) + goto error_free; + + kvfree(info); return 0; + +error_free: + if (info) + kvfree(info); + + return r; } -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; - union drm_amdgpu_cs *cs = data; uint64_t *chunk_array_user; uint64_t *chunk_array; unsigned size, num_ibs = 0; @@ -163,6 +197,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) break; + case AMDGPU_CHUNK_ID_BO_HANDLES: + size = sizeof(struct drm_amdgpu_bo_list_in); + if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { + ret = -EINVAL; + goto free_partial_kdata; + } + + ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata); + if (ret) + goto free_partial_kdata; + + break; + case AMDGPU_CHUNK_ID_DEPENDENCIES: case AMDGPU_CHUNK_ID_SYNCOBJ_IN: case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: @@ -183,9 +230,13 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) goto free_all_kdata; } - if (p->uf_entry.robj) + if (p->uf_entry.tv.bo) p->job->uf_addr = uf_offset; kfree(chunk_array); + + /* Use this opportunity to fill in task info for the vm */ + amdgpu_vm_set_task_info(vm); + return 0; free_all_kdata: @@ -257,7 +308,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, return; } - total_vram = adev->gmc.real_vram_size - adev->vram_pin_size; + total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size); used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; @@ -302,7 +353,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); /* Do the same for visible VRAM if half of it is free */ - if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size) { + if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) { u64 total_vis_vram = adev->gmc.visible_vram_size; u64 used_vis_vram = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); @@ -359,7 +410,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, * to move it. Don't move anything if the threshold is zero. */ if (p->bytes_moved < p->bytes_moved_threshold) { - if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && + if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { /* And don't move a CPU_ACCESS_REQUIRED BO to limited * visible VRAM if we've depleted our allowance to do @@ -377,11 +428,11 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, } retry: - amdgpu_ttm_placement_from_domain(bo, domain); + amdgpu_bo_placement_from_domain(bo, domain); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); p->bytes_moved += ctx.bytes_moved; - if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && + if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && amdgpu_bo_in_cpu_visible_vram(bo)) p->bytes_moved_vis += ctx.bytes_moved; @@ -408,13 +459,13 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, p->evictable = list_prev_entry(p->evictable, tv.head)) { struct amdgpu_bo_list_entry *candidate = p->evictable; - struct amdgpu_bo *bo = candidate->robj; + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); bool update_bytes_moved_vis; uint32_t other; /* If we reached our current BO we can forget it */ - if (candidate->robj == validated) + if (bo == validated) break; /* We can't move pinned BOs here */ @@ -434,9 +485,9 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, /* Good we can try to move this BO somewhere else */ update_bytes_moved_vis = - adev->gmc.visible_vram_size < adev->gmc.real_vram_size && - amdgpu_bo_in_cpu_visible_vram(bo); - amdgpu_ttm_placement_from_domain(bo, other); + !amdgpu_gmc_vram_full_visible(&adev->gmc) && + amdgpu_bo_in_cpu_visible_vram(bo); + amdgpu_bo_placement_from_domain(bo, other); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); p->bytes_moved += ctx.bytes_moved; if (update_bytes_moved_vis) @@ -479,7 +530,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, int r; list_for_each_entry(lobj, validated, tv.head) { - struct amdgpu_bo *bo = lobj->robj; + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo); bool binding_userptr = false; struct mm_struct *usermm; @@ -490,8 +541,8 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, /* Check if we have user pages and nobody bound the BO already */ if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && lobj->user_pages) { - amdgpu_ttm_placement_from_domain(bo, - AMDGPU_GEM_DOMAIN_CPU); + amdgpu_bo_placement_from_domain(bo, + AMDGPU_GEM_DOMAIN_CPU); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) return r; @@ -519,32 +570,46 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_list_entry *e; struct list_head duplicates; - unsigned i, tries = 10; struct amdgpu_bo *gds; struct amdgpu_bo *gws; struct amdgpu_bo *oa; + unsigned tries = 10; int r; INIT_LIST_HEAD(&p->validated); - p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); - if (p->bo_list) { - amdgpu_bo_list_get_list(p->bo_list, &p->validated); - if (p->bo_list->first_userptr != p->bo_list->num_entries) - p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); + /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */ + if (cs->in.bo_list_handle) { + if (p->bo_list) + return -EINVAL; + + r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle, + &p->bo_list); + if (r) + return r; + } else if (!p->bo_list) { + /* Create a empty bo_list when no handle is provided */ + r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0, + &p->bo_list); + if (r) + return r; } + amdgpu_bo_list_get_list(p->bo_list, &p->validated); + if (p->bo_list->first_userptr != p->bo_list->num_entries) + p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); + INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); - if (p->uf_entry.robj && !p->uf_entry.robj->parent) + if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) list_add(&p->uf_entry.tv.head, &p->validated); while (1) { struct list_head need_pages; - unsigned i; r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates); @@ -554,17 +619,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto error_free_pages; } - /* Without a BO list we don't have userptr BOs */ - if (!p->bo_list) - break; - INIT_LIST_HEAD(&need_pages); - for (i = p->bo_list->first_userptr; - i < p->bo_list->num_entries; ++i) { - struct amdgpu_bo *bo; - - e = &p->bo_list->array[i]; - bo = e->robj; + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, &e->user_invalidated) && e->user_pages) { @@ -583,7 +640,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, list_del(&e->tv.head); list_add(&e->tv.head, &need_pages); - amdgpu_bo_unreserve(e->robj); + amdgpu_bo_unreserve(bo); } } @@ -602,7 +659,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, /* Fill the page arrays for all userptrs. */ list_for_each_entry(e, &need_pages, tv.head) { - struct ttm_tt *ttm = e->robj->tbo.ttm; + struct ttm_tt *ttm = e->tv.bo->ttm; e->user_pages = kvmalloc_array(ttm->num_pages, sizeof(struct page*), @@ -656,39 +713,28 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, p->bytes_moved_vis); - if (p->bo_list) { - struct amdgpu_vm *vm = &fpriv->vm; - unsigned i; - - gds = p->bo_list->gds_obj; - gws = p->bo_list->gws_obj; - oa = p->bo_list->oa_obj; - for (i = 0; i < p->bo_list->num_entries; i++) { - struct amdgpu_bo *bo = p->bo_list->array[i].robj; + gds = p->bo_list->gds_obj; + gws = p->bo_list->gws_obj; + oa = p->bo_list->oa_obj; - p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo); - } - } else { - gds = p->adev->gds.gds_gfx_bo; - gws = p->adev->gds.gws_gfx_bo; - oa = p->adev->gds.oa_gfx_bo; - } + amdgpu_bo_list_for_each_entry(e, p->bo_list) + e->bo_va = amdgpu_vm_bo_find(vm, ttm_to_amdgpu_bo(e->tv.bo)); if (gds) { - p->job->gds_base = amdgpu_bo_gpu_offset(gds); - p->job->gds_size = amdgpu_bo_size(gds); + p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT; + p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT; } if (gws) { - p->job->gws_base = amdgpu_bo_gpu_offset(gws); - p->job->gws_size = amdgpu_bo_size(gws); + p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT; + p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT; } if (oa) { - p->job->oa_base = amdgpu_bo_gpu_offset(oa); - p->job->oa_size = amdgpu_bo_size(oa); + p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT; + p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT; } - if (!r && p->uf_entry.robj) { - struct amdgpu_bo *uf = p->uf_entry.robj; + if (!r && p->uf_entry.tv.bo) { + struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo); r = amdgpu_ttm_alloc_gart(&uf->tbo); p->job->uf_addr += amdgpu_bo_gpu_offset(uf); @@ -700,18 +746,12 @@ error_validate: error_free_pages: - if (p->bo_list) { - for (i = p->bo_list->first_userptr; - i < p->bo_list->num_entries; ++i) { - e = &p->bo_list->array[i]; - - if (!e->user_pages) - continue; + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + if (!e->user_pages) + continue; - release_pages(e->user_pages, - e->robj->tbo.ttm->num_pages); - kvfree(e->user_pages); - } + release_pages(e->user_pages, e->tv.bo->ttm->num_pages); + kvfree(e->user_pages); } return r; @@ -723,9 +763,11 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) int r; list_for_each_entry(e, &p->validated, tv.head) { - struct reservation_object *resv = e->robj->tbo.resv; + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + struct reservation_object *resv = bo->tbo.resv; + r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, - amdgpu_bo_explicit_sync(e->robj)); + amdgpu_bo_explicit_sync(bo)); if (r) return r; @@ -768,108 +810,26 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, kfree(parser->chunks); if (parser->job) amdgpu_job_free(parser->job); - amdgpu_bo_unref(&parser->uf_entry.robj); -} - -static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) -{ - struct amdgpu_device *adev = p->adev; - struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct amdgpu_vm *vm = &fpriv->vm; - struct amdgpu_bo_va *bo_va; - struct amdgpu_bo *bo; - int i, r; - - r = amdgpu_vm_clear_freed(adev, vm, NULL); - if (r) - return r; - - r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); - if (r) - return r; - - r = amdgpu_sync_fence(adev, &p->job->sync, - fpriv->prt_va->last_pt_update, false); - if (r) - return r; - - if (amdgpu_sriov_vf(adev)) { - struct dma_fence *f; - - bo_va = fpriv->csa_va; - BUG_ON(!bo_va); - r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) - return r; + if (parser->uf_entry.tv.bo) { + struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo); - f = bo_va->last_pt_update; - r = amdgpu_sync_fence(adev, &p->job->sync, f, false); - if (r) - return r; + amdgpu_bo_unref(&uf); } - - if (p->bo_list) { - for (i = 0; i < p->bo_list->num_entries; i++) { - struct dma_fence *f; - - /* ignore duplicates */ - bo = p->bo_list->array[i].robj; - if (!bo) - continue; - - bo_va = p->bo_list->array[i].bo_va; - if (bo_va == NULL) - continue; - - r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) - return r; - - f = bo_va->last_pt_update; - r = amdgpu_sync_fence(adev, &p->job->sync, f, false); - if (r) - return r; - } - - } - - r = amdgpu_vm_handle_moved(adev, vm); - if (r) - return r; - - r = amdgpu_vm_update_directories(adev, vm); - if (r) - return r; - - r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false); - if (r) - return r; - - if (amdgpu_vm_debug && p->bo_list) { - /* Invalidate all BOs to test for userspace bugs */ - for (i = 0; i < p->bo_list->num_entries; i++) { - /* ignore duplicates */ - bo = p->bo_list->array[i].robj; - if (!bo) - continue; - - amdgpu_vm_bo_invalidate(adev, bo, false); - } - } - - return r; } -static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, - struct amdgpu_cs_parser *p) +static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) { + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_device *adev = p->adev; struct amdgpu_vm *vm = &fpriv->vm; - struct amdgpu_ring *ring = p->job->ring; + struct amdgpu_bo_list_entry *e; + struct amdgpu_bo_va *bo_va; + struct amdgpu_bo *bo; int r; /* Only for UVD/VCE VM emulation */ - if (p->job->ring->funcs->parse_cs) { + if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) { unsigned i, j; for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { @@ -888,7 +848,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) continue; - va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK; + va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK; r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); if (r) { DRM_ERROR("IB va_start is invalid\n"); @@ -910,29 +870,110 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, offset = m->start * AMDGPU_GPU_PAGE_SIZE; kptr += va_start - offset; - memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); - amdgpu_bo_kunmap(aobj); + if (ring->funcs->parse_cs) { + memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); + amdgpu_bo_kunmap(aobj); - r = amdgpu_ring_parse_cs(ring, p, j); - if (r) - return r; + r = amdgpu_ring_parse_cs(ring, p, j); + if (r) + return r; + } else { + ib->ptr = (uint32_t *)kptr; + r = amdgpu_ring_patch_cs_in_place(ring, p, j); + amdgpu_bo_kunmap(aobj); + if (r) + return r; + } j++; } } - if (p->job->vm) { - p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo); + if (!p->job->vm) + return amdgpu_cs_sync_rings(p); + + + r = amdgpu_vm_clear_freed(adev, vm, NULL); + if (r) + return r; + + r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); + if (r) + return r; + + r = amdgpu_sync_fence(adev, &p->job->sync, + fpriv->prt_va->last_pt_update, false); + if (r) + return r; + + if (amdgpu_sriov_vf(adev)) { + struct dma_fence *f; - r = amdgpu_bo_vm_update_pte(p); + bo_va = fpriv->csa_va; + BUG_ON(!bo_va); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; - r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); + f = bo_va->last_pt_update; + r = amdgpu_sync_fence(adev, &p->job->sync, f, false); if (r) return r; } + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + struct dma_fence *f; + + /* ignore duplicates */ + bo = ttm_to_amdgpu_bo(e->tv.bo); + if (!bo) + continue; + + bo_va = e->bo_va; + if (bo_va == NULL) + continue; + + r = amdgpu_vm_bo_update(adev, bo_va, false); + if (r) + return r; + + f = bo_va->last_pt_update; + r = amdgpu_sync_fence(adev, &p->job->sync, f, false); + if (r) + return r; + } + + r = amdgpu_vm_handle_moved(adev, vm); + if (r) + return r; + + r = amdgpu_vm_update_directories(adev, vm); + if (r) + return r; + + r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false); + if (r) + return r; + + r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); + if (r) + return r; + + p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo); + + if (amdgpu_vm_debug) { + /* Invalidate all BOs to test for userspace bugs */ + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + + /* ignore duplicates */ + if (!bo) + continue; + + amdgpu_vm_bo_invalidate(adev, bo, false); + } + } + return amdgpu_cs_sync_rings(p); } @@ -941,14 +982,15 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, { struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; - int i, j; int r, ce_preempt = 0, de_preempt = 0; + struct amdgpu_ring *ring; + int i, j; for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { struct amdgpu_cs_chunk *chunk; struct amdgpu_ib *ib; struct drm_amdgpu_cs_chunk_ib *chunk_ib; - struct amdgpu_ring *ring; + struct drm_sched_entity *entity; chunk = &parser->chunks[i]; ib = &parser->job->ibs[j]; @@ -970,27 +1012,24 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, return -EINVAL; } - r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type, - chunk_ib->ip_instance, chunk_ib->ring, &ring); + r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type, + chunk_ib->ip_instance, chunk_ib->ring, + &entity); if (r) return r; - if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) { - parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT; - if (!parser->ctx->preamble_presented) { - parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; - parser->ctx->preamble_presented = true; - } - } + if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) + parser->job->preamble_status |= + AMDGPU_PREAMBLE_IB_PRESENT; - if (parser->job->ring && parser->job->ring != ring) + if (parser->entity && parser->entity != entity) return -EINVAL; - parser->job->ring = ring; + parser->entity = entity; - r = amdgpu_ib_get(adev, vm, - ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, - ib); + ring = to_amdgpu_ring(entity->rq->sched); + r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ? + chunk_ib->ib_bytes : 0, ib); if (r) { DRM_ERROR("Failed to get ib !\n"); return r; @@ -1004,12 +1043,13 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, } /* UVD & VCE fw doesn't support user fences */ + ring = to_amdgpu_ring(parser->entity->rq->sched); if (parser->job->uf_addr && ( - parser->job->ring->funcs->type == AMDGPU_RING_TYPE_UVD || - parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) + ring->funcs->type == AMDGPU_RING_TYPE_UVD || + ring->funcs->type == AMDGPU_RING_TYPE_VCE)) return -EINVAL; - return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx); + return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity); } static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, @@ -1025,24 +1065,23 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, sizeof(struct drm_amdgpu_cs_chunk_dep); for (i = 0; i < num_deps; ++i) { - struct amdgpu_ring *ring; struct amdgpu_ctx *ctx; + struct drm_sched_entity *entity; struct dma_fence *fence; ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); if (ctx == NULL) return -EINVAL; - r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr, - deps[i].ip_type, - deps[i].ip_instance, - deps[i].ring, &ring); + r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type, + deps[i].ip_instance, + deps[i].ring, &entity); if (r) { amdgpu_ctx_put(ctx); return r; } - fence = amdgpu_ctx_get_fence(ctx, ring, + fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); if (IS_ERR(fence)) { r = PTR_ERR(fence); @@ -1065,7 +1104,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, { int r; struct dma_fence *fence; - r = drm_syncobj_find_fence(p->filp, handle, &fence); + r = drm_syncobj_find_fence(p->filp, handle, 0, &fence); if (r) return r; @@ -1154,71 +1193,80 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) int i; for (i = 0; i < p->num_post_dep_syncobjs; ++i) - drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence); + drm_syncobj_replace_fence(p->post_dep_syncobjs[i], 0, p->fence); } static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { - struct amdgpu_ring *ring = p->job->ring; - struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity; + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct drm_sched_entity *entity = p->entity; + enum drm_sched_priority priority; + struct amdgpu_ring *ring; + struct amdgpu_bo_list_entry *e; struct amdgpu_job *job; - unsigned i; uint64_t seq; int r; - amdgpu_mn_lock(p->mn); - if (p->bo_list) { - for (i = p->bo_list->first_userptr; - i < p->bo_list->num_entries; ++i) { - struct amdgpu_bo *bo = p->bo_list->array[i].robj; - - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { - amdgpu_mn_unlock(p->mn); - return -ERESTARTSYS; - } - } - } - job = p->job; p->job = NULL; - r = drm_sched_job_init(&job->base, &ring->sched, entity, p->filp); - if (r) { - amdgpu_job_free(job); - amdgpu_mn_unlock(p->mn); - return r; + r = drm_sched_job_init(&job->base, entity, p->filp); + if (r) + goto error_unlock; + + /* No memory allocation is allowed while holding the mn lock */ + amdgpu_mn_lock(p->mn); + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + + if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { + r = -ERESTARTSYS; + goto error_abort; + } } job->owner = p->filp; - job->fence_ctx = entity->fence_context; p->fence = dma_fence_get(&job->base.s_fence->finished); - r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq); - if (r) { - dma_fence_put(p->fence); - dma_fence_put(&job->base.s_fence->finished); - amdgpu_job_free(job); - amdgpu_mn_unlock(p->mn); - return r; - } - + amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq); amdgpu_cs_post_dependencies(p); + if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && + !p->ctx->preamble_presented) { + job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; + p->ctx->preamble_presented = true; + } + cs->out.handle = seq; job->uf_sequence = seq; amdgpu_job_free_resources(job); - amdgpu_ring_priority_get(job->ring, job->base.s_priority); trace_amdgpu_cs_ioctl(job); + amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket); + priority = job->base.s_priority; drm_sched_entity_push_job(&job->base, entity); + ring = to_amdgpu_ring(entity->rq->sched); + amdgpu_ring_priority_get(ring, priority); + + amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); + ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); amdgpu_mn_unlock(p->mn); return 0; + +error_abort: + dma_fence_put(&job->base.s_fence->finished); + job->base.s_fence = NULL; + amdgpu_mn_unlock(p->mn); + +error_unlock: + amdgpu_job_free(job); + return r; } int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) @@ -1245,6 +1293,12 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r) goto out; + r = amdgpu_cs_dependencies(adev, &parser); + if (r) { + DRM_ERROR("Failed in the dependencies handling %d!\n", r); + goto out; + } + r = amdgpu_cs_parser_bos(&parser, data); if (r) { if (r == -ENOMEM) @@ -1256,16 +1310,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) reserved_buffers = true; - r = amdgpu_cs_dependencies(adev, &parser); - if (r) { - DRM_ERROR("Failed in the dependencies handling %d!\n", r); - goto out; - } - for (i = 0; i < parser.job->num_ibs; i++) trace_amdgpu_cs(&parser, i); - r = amdgpu_cs_ib_vm_chunk(adev, &parser); + r = amdgpu_cs_vm_handling(&parser); if (r) goto out; @@ -1289,9 +1337,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { union drm_amdgpu_wait_cs *wait = data; - struct amdgpu_device *adev = dev->dev_private; unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); - struct amdgpu_ring *ring = NULL; + struct drm_sched_entity *entity; struct amdgpu_ctx *ctx; struct dma_fence *fence; long r; @@ -1300,15 +1347,14 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, if (ctx == NULL) return -EINVAL; - r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, - wait->in.ip_type, wait->in.ip_instance, - wait->in.ring, &ring); + r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance, + wait->in.ring, &entity); if (r) { amdgpu_ctx_put(ctx); return r; } - fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); + fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle); if (IS_ERR(fence)) r = PTR_ERR(fence); else if (fence) { @@ -1340,7 +1386,7 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, struct drm_file *filp, struct drm_amdgpu_fence *user) { - struct amdgpu_ring *ring; + struct drm_sched_entity *entity; struct amdgpu_ctx *ctx; struct dma_fence *fence; int r; @@ -1349,14 +1395,14 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, if (ctx == NULL) return ERR_PTR(-EINVAL); - r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type, - user->ip_instance, user->ring, &ring); + r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance, + user->ring, &entity); if (r) { amdgpu_ctx_put(ctx); return ERR_PTR(r); } - fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no); + fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no); amdgpu_ctx_put(ctx); return fence; @@ -1605,7 +1651,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); + amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); if (r) return r; |
