diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 592 |
1 files changed, 322 insertions, 270 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e80fc38141b5..663043c8f0f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -31,45 +31,79 @@ #include <drm/drm_syncobj.h> #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_gmc.h" +#include "amdgpu_gem.h" static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, struct drm_amdgpu_cs_chunk_fence *data, uint32_t *offset) { struct drm_gem_object *gobj; + struct amdgpu_bo *bo; unsigned long size; + int r; gobj = drm_gem_object_lookup(p->filp, data->handle); if (gobj == NULL) return -EINVAL; - p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); + bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); p->uf_entry.priority = 0; - p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; + p->uf_entry.tv.bo = &bo->tbo; p->uf_entry.tv.shared = true; p->uf_entry.user_pages = NULL; - size = amdgpu_bo_size(p->uf_entry.robj); - if (size != PAGE_SIZE || (data->offset + 8) > size) - return -EINVAL; + drm_gem_object_put_unlocked(gobj); + + size = amdgpu_bo_size(bo); + if (size != PAGE_SIZE || (data->offset + 8) > size) { + r = -EINVAL; + goto error_unref; + } + + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { + r = -EINVAL; + goto error_unref; + } *offset = data->offset; - drm_gem_object_put_unlocked(gobj); + return 0; - if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) { - amdgpu_bo_unref(&p->uf_entry.robj); - return -EINVAL; - } +error_unref: + amdgpu_bo_unref(&bo); + return r; +} + +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, + struct drm_amdgpu_bo_list_in *data) +{ + int r; + struct drm_amdgpu_bo_list_entry *info = NULL; + + r = amdgpu_bo_create_list_entry_array(data, &info); + if (r) + return r; + r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, + &p->bo_list); + if (r) + goto error_free; + + kvfree(info); return 0; + +error_free: + if (info) + kvfree(info); + + return r; } -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; - union drm_amdgpu_cs *cs = data; uint64_t *chunk_array_user; uint64_t *chunk_array; unsigned size, num_ibs = 0; @@ -163,6 +197,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) break; + case AMDGPU_CHUNK_ID_BO_HANDLES: + size = sizeof(struct drm_amdgpu_bo_list_in); + if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { + ret = -EINVAL; + goto free_partial_kdata; + } + + ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata); + if (ret) + goto free_partial_kdata; + + break; + case AMDGPU_CHUNK_ID_DEPENDENCIES: case AMDGPU_CHUNK_ID_SYNCOBJ_IN: case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: @@ -183,9 +230,13 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) goto free_all_kdata; } - if (p->uf_entry.robj) + if (p->uf_entry.tv.bo) p->job->uf_addr = uf_offset; kfree(chunk_array); + + /* Use this opportunity to fill in task info for the vm */ + amdgpu_vm_set_task_info(vm); + return 0; free_all_kdata: @@ -257,7 +308,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, return; } - total_vram = adev->mc.real_vram_size - adev->vram_pin_size; + total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size); used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; @@ -302,8 +353,8 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); /* Do the same for visible VRAM if half of it is free */ - if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { - u64 total_vis_vram = adev->mc.visible_vram_size; + if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) { + u64 total_vis_vram = adev->gmc.visible_vram_size; u64 used_vis_vram = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); @@ -346,8 +397,8 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, - .allow_reserved_eviction = false, - .resv = bo->tbo.resv + .resv = bo->tbo.resv, + .flags = 0 }; uint32_t domain; int r; @@ -359,7 +410,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, * to move it. Don't move anything if the threshold is zero. */ if (p->bytes_moved < p->bytes_moved_threshold) { - if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { /* And don't move a CPU_ACCESS_REQUIRED BO to limited * visible VRAM if we've depleted our allowance to do @@ -377,13 +428,12 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, } retry: - amdgpu_ttm_placement_from_domain(bo, domain); + amdgpu_bo_placement_from_domain(bo, domain); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); p->bytes_moved += ctx.bytes_moved; - if (adev->mc.visible_vram_size < adev->mc.real_vram_size && - bo->tbo.mem.mem_type == TTM_PL_VRAM && - bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) + if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && + amdgpu_bo_in_cpu_visible_vram(bo)) p->bytes_moved_vis += ctx.bytes_moved; if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { @@ -409,14 +459,13 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, p->evictable = list_prev_entry(p->evictable, tv.head)) { struct amdgpu_bo_list_entry *candidate = p->evictable; - struct amdgpu_bo *bo = candidate->robj; + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - u64 initial_bytes_moved, bytes_moved; bool update_bytes_moved_vis; uint32_t other; /* If we reached our current BO we can forget it */ - if (candidate->robj == validated) + if (bo == validated) break; /* We can't move pinned BOs here */ @@ -435,18 +484,14 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, continue; /* Good we can try to move this BO somewhere else */ - amdgpu_ttm_placement_from_domain(bo, other); update_bytes_moved_vis = - adev->mc.visible_vram_size < adev->mc.real_vram_size && - bo->tbo.mem.mem_type == TTM_PL_VRAM && - bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT; - initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); + !amdgpu_gmc_vram_full_visible(&adev->gmc) && + amdgpu_bo_in_cpu_visible_vram(bo); + amdgpu_bo_placement_from_domain(bo, other); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - bytes_moved = atomic64_read(&adev->num_bytes_moved) - - initial_bytes_moved; - p->bytes_moved += bytes_moved; + p->bytes_moved += ctx.bytes_moved; if (update_bytes_moved_vis) - p->bytes_moved_vis += bytes_moved; + p->bytes_moved_vis += ctx.bytes_moved; if (unlikely(r)) break; @@ -485,7 +530,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, int r; list_for_each_entry(lobj, validated, tv.head) { - struct amdgpu_bo *bo = lobj->robj; + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo); bool binding_userptr = false; struct mm_struct *usermm; @@ -496,8 +541,8 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, /* Check if we have user pages and nobody bound the BO already */ if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && lobj->user_pages) { - amdgpu_ttm_placement_from_domain(bo, - AMDGPU_GEM_DOMAIN_CPU); + amdgpu_bo_placement_from_domain(bo, + AMDGPU_GEM_DOMAIN_CPU); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) return r; @@ -525,29 +570,46 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_list_entry *e; struct list_head duplicates; - unsigned i, tries = 10; + struct amdgpu_bo *gds; + struct amdgpu_bo *gws; + struct amdgpu_bo *oa; + unsigned tries = 10; int r; INIT_LIST_HEAD(&p->validated); - p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); - if (p->bo_list) { - amdgpu_bo_list_get_list(p->bo_list, &p->validated); - if (p->bo_list->first_userptr != p->bo_list->num_entries) - p->mn = amdgpu_mn_get(p->adev); + /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */ + if (cs->in.bo_list_handle) { + if (p->bo_list) + return -EINVAL; + + r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle, + &p->bo_list); + if (r) + return r; + } else if (!p->bo_list) { + /* Create a empty bo_list when no handle is provided */ + r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0, + &p->bo_list); + if (r) + return r; } + amdgpu_bo_list_get_list(p->bo_list, &p->validated); + if (p->bo_list->first_userptr != p->bo_list->num_entries) + p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); + INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); - if (p->uf_entry.robj) + if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) list_add(&p->uf_entry.tv.head, &p->validated); while (1) { struct list_head need_pages; - unsigned i; r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates); @@ -557,17 +619,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto error_free_pages; } - /* Without a BO list we don't have userptr BOs */ - if (!p->bo_list) - break; - INIT_LIST_HEAD(&need_pages); - for (i = p->bo_list->first_userptr; - i < p->bo_list->num_entries; ++i) { - struct amdgpu_bo *bo; - - e = &p->bo_list->array[i]; - bo = e->robj; + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, &e->user_invalidated) && e->user_pages) { @@ -586,7 +640,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, list_del(&e->tv.head); list_add(&e->tv.head, &need_pages); - amdgpu_bo_unreserve(e->robj); + amdgpu_bo_unreserve(bo); } } @@ -605,7 +659,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, /* Fill the page arrays for all userptrs. */ list_for_each_entry(e, &need_pages, tv.head) { - struct ttm_tt *ttm = e->robj->tbo.ttm; + struct ttm_tt *ttm = e->tv.bo->ttm; e->user_pages = kvmalloc_array(ttm->num_pages, sizeof(struct page*), @@ -658,35 +712,29 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, p->bytes_moved_vis); - if (p->bo_list) { - struct amdgpu_bo *gds = p->bo_list->gds_obj; - struct amdgpu_bo *gws = p->bo_list->gws_obj; - struct amdgpu_bo *oa = p->bo_list->oa_obj; - struct amdgpu_vm *vm = &fpriv->vm; - unsigned i; - for (i = 0; i < p->bo_list->num_entries; i++) { - struct amdgpu_bo *bo = p->bo_list->array[i].robj; + gds = p->bo_list->gds_obj; + gws = p->bo_list->gws_obj; + oa = p->bo_list->oa_obj; - p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo); - } + amdgpu_bo_list_for_each_entry(e, p->bo_list) + e->bo_va = amdgpu_vm_bo_find(vm, ttm_to_amdgpu_bo(e->tv.bo)); - if (gds) { - p->job->gds_base = amdgpu_bo_gpu_offset(gds); - p->job->gds_size = amdgpu_bo_size(gds); - } - if (gws) { - p->job->gws_base = amdgpu_bo_gpu_offset(gws); - p->job->gws_size = amdgpu_bo_size(gws); - } - if (oa) { - p->job->oa_base = amdgpu_bo_gpu_offset(oa); - p->job->oa_size = amdgpu_bo_size(oa); - } + if (gds) { + p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT; + p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT; + } + if (gws) { + p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT; + p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT; + } + if (oa) { + p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT; + p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT; } - if (!r && p->uf_entry.robj) { - struct amdgpu_bo *uf = p->uf_entry.robj; + if (!r && p->uf_entry.tv.bo) { + struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo); r = amdgpu_ttm_alloc_gart(&uf->tbo); p->job->uf_addr += amdgpu_bo_gpu_offset(uf); @@ -698,18 +746,12 @@ error_validate: error_free_pages: - if (p->bo_list) { - for (i = p->bo_list->first_userptr; - i < p->bo_list->num_entries; ++i) { - e = &p->bo_list->array[i]; - - if (!e->user_pages) - continue; + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + if (!e->user_pages) + continue; - release_pages(e->user_pages, - e->robj->tbo.ttm->num_pages); - kvfree(e->user_pages); - } + release_pages(e->user_pages, e->tv.bo->ttm->num_pages); + kvfree(e->user_pages); } return r; @@ -721,9 +763,11 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) int r; list_for_each_entry(e, &p->validated, tv.head) { - struct reservation_object *resv = e->robj->tbo.resv; + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + struct reservation_object *resv = bo->tbo.resv; + r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, - amdgpu_bo_explicit_sync(e->robj)); + amdgpu_bo_explicit_sync(bo)); if (r) return r; @@ -766,17 +810,88 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, kfree(parser->chunks); if (parser->job) amdgpu_job_free(parser->job); - amdgpu_bo_unref(&parser->uf_entry.robj); + if (parser->uf_entry.tv.bo) { + struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo); + + amdgpu_bo_unref(&uf); + } } -static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) +static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) { - struct amdgpu_device *adev = p->adev; + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_device *adev = p->adev; struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_bo_list_entry *e; struct amdgpu_bo_va *bo_va; struct amdgpu_bo *bo; - int i, r; + int r; + + /* Only for UVD/VCE VM emulation */ + if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) { + unsigned i, j; + + for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { + struct drm_amdgpu_cs_chunk_ib *chunk_ib; + struct amdgpu_bo_va_mapping *m; + struct amdgpu_bo *aobj = NULL; + struct amdgpu_cs_chunk *chunk; + uint64_t offset, va_start; + struct amdgpu_ib *ib; + uint8_t *kptr; + + chunk = &p->chunks[i]; + ib = &p->job->ibs[j]; + chunk_ib = chunk->kdata; + + if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) + continue; + + va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK; + r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); + if (r) { + DRM_ERROR("IB va_start is invalid\n"); + return r; + } + + if ((va_start + chunk_ib->ib_bytes) > + (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { + DRM_ERROR("IB va_start+ib_bytes is invalid\n"); + return -EINVAL; + } + + /* the IB should be reserved at this point */ + r = amdgpu_bo_kmap(aobj, (void **)&kptr); + if (r) { + return r; + } + + offset = m->start * AMDGPU_GPU_PAGE_SIZE; + kptr += va_start - offset; + + if (ring->funcs->parse_cs) { + memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); + amdgpu_bo_kunmap(aobj); + + r = amdgpu_ring_parse_cs(ring, p, j); + if (r) + return r; + } else { + ib->ptr = (uint32_t *)kptr; + r = amdgpu_ring_patch_cs_in_place(ring, p, j); + amdgpu_bo_kunmap(aobj); + if (r) + return r; + } + + j++; + } + } + + if (!p->job->vm) + return amdgpu_cs_sync_rings(p); + r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) @@ -806,29 +921,26 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) return r; } - if (p->bo_list) { - for (i = 0; i < p->bo_list->num_entries; i++) { - struct dma_fence *f; - - /* ignore duplicates */ - bo = p->bo_list->array[i].robj; - if (!bo) - continue; + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + struct dma_fence *f; - bo_va = p->bo_list->array[i].bo_va; - if (bo_va == NULL) - continue; + /* ignore duplicates */ + bo = ttm_to_amdgpu_bo(e->tv.bo); + if (!bo) + continue; - r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) - return r; + bo_va = e->bo_va; + if (bo_va == NULL) + continue; - f = bo_va->last_pt_update; - r = amdgpu_sync_fence(adev, &p->job->sync, f, false); - if (r) - return r; - } + r = amdgpu_vm_bo_update(adev, bo_va, false); + if (r) + return r; + f = bo_va->last_pt_update; + r = amdgpu_sync_fence(adev, &p->job->sync, f, false); + if (r) + return r; } r = amdgpu_vm_handle_moved(adev, vm); @@ -843,11 +955,18 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) if (r) return r; - if (amdgpu_vm_debug && p->bo_list) { + r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); + if (r) + return r; + + p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo); + + if (amdgpu_vm_debug) { /* Invalidate all BOs to test for userspace bugs */ - for (i = 0; i < p->bo_list->num_entries; i++) { + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + /* ignore duplicates */ - bo = p->bo_list->array[i].robj; if (!bo) continue; @@ -855,78 +974,6 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) } } - return r; -} - -static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, - struct amdgpu_cs_parser *p) -{ - struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct amdgpu_vm *vm = &fpriv->vm; - struct amdgpu_ring *ring = p->job->ring; - int r; - - /* Only for UVD/VCE VM emulation */ - if (p->job->ring->funcs->parse_cs) { - unsigned i, j; - - for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { - struct drm_amdgpu_cs_chunk_ib *chunk_ib; - struct amdgpu_bo_va_mapping *m; - struct amdgpu_bo *aobj = NULL; - struct amdgpu_cs_chunk *chunk; - uint64_t offset, va_start; - struct amdgpu_ib *ib; - uint8_t *kptr; - - chunk = &p->chunks[i]; - ib = &p->job->ibs[j]; - chunk_ib = chunk->kdata; - - if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) - continue; - - va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK; - r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); - if (r) { - DRM_ERROR("IB va_start is invalid\n"); - return r; - } - - if ((va_start + chunk_ib->ib_bytes) > - (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { - DRM_ERROR("IB va_start+ib_bytes is invalid\n"); - return -EINVAL; - } - - /* the IB should be reserved at this point */ - r = amdgpu_bo_kmap(aobj, (void **)&kptr); - if (r) { - return r; - } - - offset = m->start * AMDGPU_GPU_PAGE_SIZE; - kptr += va_start - offset; - - memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); - amdgpu_bo_kunmap(aobj); - - r = amdgpu_ring_parse_cs(ring, p, j); - if (r) - return r; - - j++; - } - } - - if (p->job->vm) { - p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo); - - r = amdgpu_bo_vm_update_pte(p); - if (r) - return r; - } - return amdgpu_cs_sync_rings(p); } @@ -935,14 +982,15 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, { struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; - int i, j; int r, ce_preempt = 0, de_preempt = 0; + struct amdgpu_ring *ring; + int i, j; for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { struct amdgpu_cs_chunk *chunk; struct amdgpu_ib *ib; struct drm_amdgpu_cs_chunk_ib *chunk_ib; - struct amdgpu_ring *ring; + struct drm_sched_entity *entity; chunk = &parser->chunks[i]; ib = &parser->job->ibs[j]; @@ -964,27 +1012,24 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, return -EINVAL; } - r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type, - chunk_ib->ip_instance, chunk_ib->ring, &ring); + r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type, + chunk_ib->ip_instance, chunk_ib->ring, + &entity); if (r) return r; - if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) { - parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT; - if (!parser->ctx->preamble_presented) { - parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; - parser->ctx->preamble_presented = true; - } - } + if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) + parser->job->preamble_status |= + AMDGPU_PREAMBLE_IB_PRESENT; - if (parser->job->ring && parser->job->ring != ring) + if (parser->entity && parser->entity != entity) return -EINVAL; - parser->job->ring = ring; + parser->entity = entity; - r = amdgpu_ib_get(adev, vm, - ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, - ib); + ring = to_amdgpu_ring(entity->rq->sched); + r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ? + chunk_ib->ib_bytes : 0, ib); if (r) { DRM_ERROR("Failed to get ib !\n"); return r; @@ -998,12 +1043,13 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, } /* UVD & VCE fw doesn't support user fences */ + ring = to_amdgpu_ring(parser->entity->rq->sched); if (parser->job->uf_addr && ( - parser->job->ring->funcs->type == AMDGPU_RING_TYPE_UVD || - parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) + ring->funcs->type == AMDGPU_RING_TYPE_UVD || + ring->funcs->type == AMDGPU_RING_TYPE_VCE)) return -EINVAL; - return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx); + return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity); } static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, @@ -1019,24 +1065,23 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, sizeof(struct drm_amdgpu_cs_chunk_dep); for (i = 0; i < num_deps; ++i) { - struct amdgpu_ring *ring; struct amdgpu_ctx *ctx; + struct drm_sched_entity *entity; struct dma_fence *fence; ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); if (ctx == NULL) return -EINVAL; - r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr, - deps[i].ip_type, - deps[i].ip_instance, - deps[i].ring, &ring); + r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type, + deps[i].ip_instance, + deps[i].ring, &entity); if (r) { amdgpu_ctx_put(ctx); return r; } - fence = amdgpu_ctx_get_fence(ctx, ring, + fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); if (IS_ERR(fence)) { r = PTR_ERR(fence); @@ -1059,7 +1104,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, { int r; struct dma_fence *fence; - r = drm_syncobj_find_fence(p->filp, handle, &fence); + r = drm_syncobj_find_fence(p->filp, handle, 0, &fence); if (r) return r; @@ -1148,71 +1193,80 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) int i; for (i = 0; i < p->num_post_dep_syncobjs; ++i) - drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence); + drm_syncobj_replace_fence(p->post_dep_syncobjs[i], 0, p->fence); } static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { - struct amdgpu_ring *ring = p->job->ring; - struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity; + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct drm_sched_entity *entity = p->entity; + enum drm_sched_priority priority; + struct amdgpu_ring *ring; + struct amdgpu_bo_list_entry *e; struct amdgpu_job *job; - unsigned i; uint64_t seq; int r; - amdgpu_mn_lock(p->mn); - if (p->bo_list) { - for (i = p->bo_list->first_userptr; - i < p->bo_list->num_entries; ++i) { - struct amdgpu_bo *bo = p->bo_list->array[i].robj; - - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { - amdgpu_mn_unlock(p->mn); - return -ERESTARTSYS; - } - } - } - job = p->job; p->job = NULL; - r = drm_sched_job_init(&job->base, &ring->sched, entity, p->filp); - if (r) { - amdgpu_job_free(job); - amdgpu_mn_unlock(p->mn); - return r; + r = drm_sched_job_init(&job->base, entity, p->filp); + if (r) + goto error_unlock; + + /* No memory allocation is allowed while holding the mn lock */ + amdgpu_mn_lock(p->mn); + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + + if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { + r = -ERESTARTSYS; + goto error_abort; + } } job->owner = p->filp; - job->fence_ctx = entity->fence_context; p->fence = dma_fence_get(&job->base.s_fence->finished); - r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq); - if (r) { - dma_fence_put(p->fence); - dma_fence_put(&job->base.s_fence->finished); - amdgpu_job_free(job); - amdgpu_mn_unlock(p->mn); - return r; - } - + amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq); amdgpu_cs_post_dependencies(p); + if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && + !p->ctx->preamble_presented) { + job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; + p->ctx->preamble_presented = true; + } + cs->out.handle = seq; job->uf_sequence = seq; amdgpu_job_free_resources(job); - amdgpu_ring_priority_get(job->ring, job->base.s_priority); trace_amdgpu_cs_ioctl(job); + amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket); + priority = job->base.s_priority; drm_sched_entity_push_job(&job->base, entity); + ring = to_amdgpu_ring(entity->rq->sched); + amdgpu_ring_priority_get(ring, priority); + + amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); + ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); amdgpu_mn_unlock(p->mn); return 0; + +error_abort: + dma_fence_put(&job->base.s_fence->finished); + job->base.s_fence = NULL; + amdgpu_mn_unlock(p->mn); + +error_unlock: + amdgpu_job_free(job); + return r; } int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) @@ -1239,6 +1293,12 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r) goto out; + r = amdgpu_cs_dependencies(adev, &parser); + if (r) { + DRM_ERROR("Failed in the dependencies handling %d!\n", r); + goto out; + } + r = amdgpu_cs_parser_bos(&parser, data); if (r) { if (r == -ENOMEM) @@ -1250,16 +1310,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) reserved_buffers = true; - r = amdgpu_cs_dependencies(adev, &parser); - if (r) { - DRM_ERROR("Failed in the dependencies handling %d!\n", r); - goto out; - } - for (i = 0; i < parser.job->num_ibs; i++) trace_amdgpu_cs(&parser, i); - r = amdgpu_cs_ib_vm_chunk(adev, &parser); + r = amdgpu_cs_vm_handling(&parser); if (r) goto out; @@ -1283,9 +1337,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { union drm_amdgpu_wait_cs *wait = data; - struct amdgpu_device *adev = dev->dev_private; unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); - struct amdgpu_ring *ring = NULL; + struct drm_sched_entity *entity; struct amdgpu_ctx *ctx; struct dma_fence *fence; long r; @@ -1294,15 +1347,14 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, if (ctx == NULL) return -EINVAL; - r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, - wait->in.ip_type, wait->in.ip_instance, - wait->in.ring, &ring); + r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance, + wait->in.ring, &entity); if (r) { amdgpu_ctx_put(ctx); return r; } - fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); + fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle); if (IS_ERR(fence)) r = PTR_ERR(fence); else if (fence) { @@ -1334,7 +1386,7 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, struct drm_file *filp, struct drm_amdgpu_fence *user) { - struct amdgpu_ring *ring; + struct drm_sched_entity *entity; struct amdgpu_ctx *ctx; struct dma_fence *fence; int r; @@ -1343,14 +1395,14 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, if (ctx == NULL) return ERR_PTR(-EINVAL); - r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type, - user->ip_instance, user->ring, &ring); + r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance, + user->ring, &entity); if (r) { amdgpu_ctx_put(ctx); return ERR_PTR(r); } - fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no); + fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no); amdgpu_ctx_put(ctx); return fence; @@ -1599,7 +1651,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); + amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); if (r) return r; |
