summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c570
1 files changed, 308 insertions, 262 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9c85a90be293..663043c8f0f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -31,45 +31,79 @@
#include <drm/drm_syncobj.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
+#include "amdgpu_gmc.h"
+#include "amdgpu_gem.h"
static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
struct drm_amdgpu_cs_chunk_fence *data,
uint32_t *offset)
{
struct drm_gem_object *gobj;
+ struct amdgpu_bo *bo;
unsigned long size;
+ int r;
gobj = drm_gem_object_lookup(p->filp, data->handle);
if (gobj == NULL)
return -EINVAL;
- p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+ bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
p->uf_entry.priority = 0;
- p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
+ p->uf_entry.tv.bo = &bo->tbo;
p->uf_entry.tv.shared = true;
p->uf_entry.user_pages = NULL;
- size = amdgpu_bo_size(p->uf_entry.robj);
- if (size != PAGE_SIZE || (data->offset + 8) > size)
- return -EINVAL;
+ drm_gem_object_put_unlocked(gobj);
+
+ size = amdgpu_bo_size(bo);
+ if (size != PAGE_SIZE || (data->offset + 8) > size) {
+ r = -EINVAL;
+ goto error_unref;
+ }
+
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+ r = -EINVAL;
+ goto error_unref;
+ }
*offset = data->offset;
- drm_gem_object_put_unlocked(gobj);
+ return 0;
- if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
- amdgpu_bo_unref(&p->uf_entry.robj);
- return -EINVAL;
- }
+error_unref:
+ amdgpu_bo_unref(&bo);
+ return r;
+}
+
+static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_bo_list_in *data)
+{
+ int r;
+ struct drm_amdgpu_bo_list_entry *info = NULL;
+
+ r = amdgpu_bo_create_list_entry_array(data, &info);
+ if (r)
+ return r;
+ r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
+ &p->bo_list);
+ if (r)
+ goto error_free;
+
+ kvfree(info);
return 0;
+
+error_free:
+ if (info)
+ kvfree(info);
+
+ return r;
}
-static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
+static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
- union drm_amdgpu_cs *cs = data;
uint64_t *chunk_array_user;
uint64_t *chunk_array;
unsigned size, num_ibs = 0;
@@ -163,6 +197,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
break;
+ case AMDGPU_CHUNK_ID_BO_HANDLES:
+ size = sizeof(struct drm_amdgpu_bo_list_in);
+ if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+ ret = -EINVAL;
+ goto free_partial_kdata;
+ }
+
+ ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
+ if (ret)
+ goto free_partial_kdata;
+
+ break;
+
case AMDGPU_CHUNK_ID_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@@ -183,9 +230,13 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
goto free_all_kdata;
}
- if (p->uf_entry.robj)
+ if (p->uf_entry.tv.bo)
p->job->uf_addr = uf_offset;
kfree(chunk_array);
+
+ /* Use this opportunity to fill in task info for the vm */
+ amdgpu_vm_set_task_info(vm);
+
return 0;
free_all_kdata:
@@ -257,7 +308,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
return;
}
- total_vram = adev->gmc.real_vram_size - adev->vram_pin_size;
+ total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
@@ -302,7 +353,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
/* Do the same for visible VRAM if half of it is free */
- if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size) {
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
u64 total_vis_vram = adev->gmc.visible_vram_size;
u64 used_vis_vram =
amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
@@ -359,7 +410,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
* to move it. Don't move anything if the threshold is zero.
*/
if (p->bytes_moved < p->bytes_moved_threshold) {
- if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
(bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
/* And don't move a CPU_ACCESS_REQUIRED BO to limited
* visible VRAM if we've depleted our allowance to do
@@ -377,11 +428,11 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
}
retry:
- amdgpu_ttm_placement_from_domain(bo, domain);
+ amdgpu_bo_placement_from_domain(bo, domain);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
p->bytes_moved += ctx.bytes_moved;
- if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
amdgpu_bo_in_cpu_visible_vram(bo))
p->bytes_moved_vis += ctx.bytes_moved;
@@ -408,13 +459,13 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
p->evictable = list_prev_entry(p->evictable, tv.head)) {
struct amdgpu_bo_list_entry *candidate = p->evictable;
- struct amdgpu_bo *bo = candidate->robj;
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
bool update_bytes_moved_vis;
uint32_t other;
/* If we reached our current BO we can forget it */
- if (candidate->robj == validated)
+ if (bo == validated)
break;
/* We can't move pinned BOs here */
@@ -434,9 +485,9 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
/* Good we can try to move this BO somewhere else */
update_bytes_moved_vis =
- adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
- amdgpu_bo_in_cpu_visible_vram(bo);
- amdgpu_ttm_placement_from_domain(bo, other);
+ !amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+ amdgpu_bo_in_cpu_visible_vram(bo);
+ amdgpu_bo_placement_from_domain(bo, other);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
p->bytes_moved += ctx.bytes_moved;
if (update_bytes_moved_vis)
@@ -479,7 +530,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
int r;
list_for_each_entry(lobj, validated, tv.head) {
- struct amdgpu_bo *bo = lobj->robj;
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
bool binding_userptr = false;
struct mm_struct *usermm;
@@ -490,8 +541,8 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
/* Check if we have user pages and nobody bound the BO already */
if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
lobj->user_pages) {
- amdgpu_ttm_placement_from_domain(bo,
- AMDGPU_GEM_DOMAIN_CPU);
+ amdgpu_bo_placement_from_domain(bo,
+ AMDGPU_GEM_DOMAIN_CPU);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
return r;
@@ -519,32 +570,46 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_list_entry *e;
struct list_head duplicates;
- unsigned i, tries = 10;
struct amdgpu_bo *gds;
struct amdgpu_bo *gws;
struct amdgpu_bo *oa;
+ unsigned tries = 10;
int r;
INIT_LIST_HEAD(&p->validated);
- p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
- if (p->bo_list) {
- amdgpu_bo_list_get_list(p->bo_list, &p->validated);
- if (p->bo_list->first_userptr != p->bo_list->num_entries)
- p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
+ /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
+ if (cs->in.bo_list_handle) {
+ if (p->bo_list)
+ return -EINVAL;
+
+ r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
+ &p->bo_list);
+ if (r)
+ return r;
+ } else if (!p->bo_list) {
+ /* Create a empty bo_list when no handle is provided */
+ r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,
+ &p->bo_list);
+ if (r)
+ return r;
}
+ amdgpu_bo_list_get_list(p->bo_list, &p->validated);
+ if (p->bo_list->first_userptr != p->bo_list->num_entries)
+ p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
+
INIT_LIST_HEAD(&duplicates);
amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
- if (p->uf_entry.robj && !p->uf_entry.robj->parent)
+ if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
list_add(&p->uf_entry.tv.head, &p->validated);
while (1) {
struct list_head need_pages;
- unsigned i;
r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
&duplicates);
@@ -554,17 +619,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
goto error_free_pages;
}
- /* Without a BO list we don't have userptr BOs */
- if (!p->bo_list)
- break;
-
INIT_LIST_HEAD(&need_pages);
- for (i = p->bo_list->first_userptr;
- i < p->bo_list->num_entries; ++i) {
- struct amdgpu_bo *bo;
-
- e = &p->bo_list->array[i];
- bo = e->robj;
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
&e->user_invalidated) && e->user_pages) {
@@ -583,7 +640,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
list_del(&e->tv.head);
list_add(&e->tv.head, &need_pages);
- amdgpu_bo_unreserve(e->robj);
+ amdgpu_bo_unreserve(bo);
}
}
@@ -602,7 +659,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
/* Fill the page arrays for all userptrs. */
list_for_each_entry(e, &need_pages, tv.head) {
- struct ttm_tt *ttm = e->robj->tbo.ttm;
+ struct ttm_tt *ttm = e->tv.bo->ttm;
e->user_pages = kvmalloc_array(ttm->num_pages,
sizeof(struct page*),
@@ -656,39 +713,28 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
p->bytes_moved_vis);
- if (p->bo_list) {
- struct amdgpu_vm *vm = &fpriv->vm;
- unsigned i;
-
- gds = p->bo_list->gds_obj;
- gws = p->bo_list->gws_obj;
- oa = p->bo_list->oa_obj;
- for (i = 0; i < p->bo_list->num_entries; i++) {
- struct amdgpu_bo *bo = p->bo_list->array[i].robj;
+ gds = p->bo_list->gds_obj;
+ gws = p->bo_list->gws_obj;
+ oa = p->bo_list->oa_obj;
- p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo);
- }
- } else {
- gds = p->adev->gds.gds_gfx_bo;
- gws = p->adev->gds.gws_gfx_bo;
- oa = p->adev->gds.oa_gfx_bo;
- }
+ amdgpu_bo_list_for_each_entry(e, p->bo_list)
+ e->bo_va = amdgpu_vm_bo_find(vm, ttm_to_amdgpu_bo(e->tv.bo));
if (gds) {
- p->job->gds_base = amdgpu_bo_gpu_offset(gds);
- p->job->gds_size = amdgpu_bo_size(gds);
+ p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
+ p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
}
if (gws) {
- p->job->gws_base = amdgpu_bo_gpu_offset(gws);
- p->job->gws_size = amdgpu_bo_size(gws);
+ p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
+ p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
}
if (oa) {
- p->job->oa_base = amdgpu_bo_gpu_offset(oa);
- p->job->oa_size = amdgpu_bo_size(oa);
+ p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
+ p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
}
- if (!r && p->uf_entry.robj) {
- struct amdgpu_bo *uf = p->uf_entry.robj;
+ if (!r && p->uf_entry.tv.bo) {
+ struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
r = amdgpu_ttm_alloc_gart(&uf->tbo);
p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
@@ -700,18 +746,12 @@ error_validate:
error_free_pages:
- if (p->bo_list) {
- for (i = p->bo_list->first_userptr;
- i < p->bo_list->num_entries; ++i) {
- e = &p->bo_list->array[i];
-
- if (!e->user_pages)
- continue;
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ if (!e->user_pages)
+ continue;
- release_pages(e->user_pages,
- e->robj->tbo.ttm->num_pages);
- kvfree(e->user_pages);
- }
+ release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
+ kvfree(e->user_pages);
}
return r;
@@ -723,9 +763,11 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
int r;
list_for_each_entry(e, &p->validated, tv.head) {
- struct reservation_object *resv = e->robj->tbo.resv;
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+ struct reservation_object *resv = bo->tbo.resv;
+
r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
- amdgpu_bo_explicit_sync(e->robj));
+ amdgpu_bo_explicit_sync(bo));
if (r)
return r;
@@ -768,108 +810,26 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
kfree(parser->chunks);
if (parser->job)
amdgpu_job_free(parser->job);
- amdgpu_bo_unref(&parser->uf_entry.robj);
-}
-
-static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
-{
- struct amdgpu_device *adev = p->adev;
- struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- struct amdgpu_vm *vm = &fpriv->vm;
- struct amdgpu_bo_va *bo_va;
- struct amdgpu_bo *bo;
- int i, r;
-
- r = amdgpu_vm_clear_freed(adev, vm, NULL);
- if (r)
- return r;
-
- r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
- if (r)
- return r;
-
- r = amdgpu_sync_fence(adev, &p->job->sync,
- fpriv->prt_va->last_pt_update, false);
- if (r)
- return r;
-
- if (amdgpu_sriov_vf(adev)) {
- struct dma_fence *f;
-
- bo_va = fpriv->csa_va;
- BUG_ON(!bo_va);
- r = amdgpu_vm_bo_update(adev, bo_va, false);
- if (r)
- return r;
+ if (parser->uf_entry.tv.bo) {
+ struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
- f = bo_va->last_pt_update;
- r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
- if (r)
- return r;
+ amdgpu_bo_unref(&uf);
}
-
- if (p->bo_list) {
- for (i = 0; i < p->bo_list->num_entries; i++) {
- struct dma_fence *f;
-
- /* ignore duplicates */
- bo = p->bo_list->array[i].robj;
- if (!bo)
- continue;
-
- bo_va = p->bo_list->array[i].bo_va;
- if (bo_va == NULL)
- continue;
-
- r = amdgpu_vm_bo_update(adev, bo_va, false);
- if (r)
- return r;
-
- f = bo_va->last_pt_update;
- r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
- if (r)
- return r;
- }
-
- }
-
- r = amdgpu_vm_handle_moved(adev, vm);
- if (r)
- return r;
-
- r = amdgpu_vm_update_directories(adev, vm);
- if (r)
- return r;
-
- r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false);
- if (r)
- return r;
-
- if (amdgpu_vm_debug && p->bo_list) {
- /* Invalidate all BOs to test for userspace bugs */
- for (i = 0; i < p->bo_list->num_entries; i++) {
- /* ignore duplicates */
- bo = p->bo_list->array[i].robj;
- if (!bo)
- continue;
-
- amdgpu_vm_bo_invalidate(adev, bo, false);
- }
- }
-
- return r;
}
-static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
- struct amdgpu_cs_parser *p)
+static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
{
+ struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_device *adev = p->adev;
struct amdgpu_vm *vm = &fpriv->vm;
- struct amdgpu_ring *ring = p->job->ring;
+ struct amdgpu_bo_list_entry *e;
+ struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo *bo;
int r;
/* Only for UVD/VCE VM emulation */
- if (p->job->ring->funcs->parse_cs) {
+ if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
unsigned i, j;
for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
@@ -888,7 +848,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
continue;
- va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK;
+ va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
if (r) {
DRM_ERROR("IB va_start is invalid\n");
@@ -910,29 +870,110 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
offset = m->start * AMDGPU_GPU_PAGE_SIZE;
kptr += va_start - offset;
- memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
- amdgpu_bo_kunmap(aobj);
+ if (ring->funcs->parse_cs) {
+ memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
+ amdgpu_bo_kunmap(aobj);
- r = amdgpu_ring_parse_cs(ring, p, j);
- if (r)
- return r;
+ r = amdgpu_ring_parse_cs(ring, p, j);
+ if (r)
+ return r;
+ } else {
+ ib->ptr = (uint32_t *)kptr;
+ r = amdgpu_ring_patch_cs_in_place(ring, p, j);
+ amdgpu_bo_kunmap(aobj);
+ if (r)
+ return r;
+ }
j++;
}
}
- if (p->job->vm) {
- p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
+ if (!p->job->vm)
+ return amdgpu_cs_sync_rings(p);
+
+
+ r = amdgpu_vm_clear_freed(adev, vm, NULL);
+ if (r)
+ return r;
+
+ r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
+ if (r)
+ return r;
+
+ r = amdgpu_sync_fence(adev, &p->job->sync,
+ fpriv->prt_va->last_pt_update, false);
+ if (r)
+ return r;
+
+ if (amdgpu_sriov_vf(adev)) {
+ struct dma_fence *f;
- r = amdgpu_bo_vm_update_pte(p);
+ bo_va = fpriv->csa_va;
+ BUG_ON(!bo_va);
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
return r;
- r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
+ f = bo_va->last_pt_update;
+ r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
if (r)
return r;
}
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ struct dma_fence *f;
+
+ /* ignore duplicates */
+ bo = ttm_to_amdgpu_bo(e->tv.bo);
+ if (!bo)
+ continue;
+
+ bo_va = e->bo_va;
+ if (bo_va == NULL)
+ continue;
+
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
+ if (r)
+ return r;
+
+ f = bo_va->last_pt_update;
+ r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_vm_handle_moved(adev, vm);
+ if (r)
+ return r;
+
+ r = amdgpu_vm_update_directories(adev, vm);
+ if (r)
+ return r;
+
+ r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false);
+ if (r)
+ return r;
+
+ r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
+ if (r)
+ return r;
+
+ p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
+
+ if (amdgpu_vm_debug) {
+ /* Invalidate all BOs to test for userspace bugs */
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+
+ /* ignore duplicates */
+ if (!bo)
+ continue;
+
+ amdgpu_vm_bo_invalidate(adev, bo, false);
+ }
+ }
+
return amdgpu_cs_sync_rings(p);
}
@@ -941,14 +982,15 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
{
struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
- int i, j;
int r, ce_preempt = 0, de_preempt = 0;
+ struct amdgpu_ring *ring;
+ int i, j;
for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
struct amdgpu_cs_chunk *chunk;
struct amdgpu_ib *ib;
struct drm_amdgpu_cs_chunk_ib *chunk_ib;
- struct amdgpu_ring *ring;
+ struct drm_sched_entity *entity;
chunk = &parser->chunks[i];
ib = &parser->job->ibs[j];
@@ -970,27 +1012,24 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
return -EINVAL;
}
- r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type,
- chunk_ib->ip_instance, chunk_ib->ring, &ring);
+ r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
+ chunk_ib->ip_instance, chunk_ib->ring,
+ &entity);
if (r)
return r;
- if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
- parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
- if (!parser->ctx->preamble_presented) {
- parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
- parser->ctx->preamble_presented = true;
- }
- }
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
+ parser->job->preamble_status |=
+ AMDGPU_PREAMBLE_IB_PRESENT;
- if (parser->job->ring && parser->job->ring != ring)
+ if (parser->entity && parser->entity != entity)
return -EINVAL;
- parser->job->ring = ring;
+ parser->entity = entity;
- r = amdgpu_ib_get(adev, vm,
- ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
- ib);
+ ring = to_amdgpu_ring(entity->rq->sched);
+ r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
+ chunk_ib->ib_bytes : 0, ib);
if (r) {
DRM_ERROR("Failed to get ib !\n");
return r;
@@ -1004,12 +1043,13 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
}
/* UVD & VCE fw doesn't support user fences */
+ ring = to_amdgpu_ring(parser->entity->rq->sched);
if (parser->job->uf_addr && (
- parser->job->ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
- parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
+ ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
+ ring->funcs->type == AMDGPU_RING_TYPE_VCE))
return -EINVAL;
- return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx);
+ return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
}
static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
@@ -1025,24 +1065,23 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
sizeof(struct drm_amdgpu_cs_chunk_dep);
for (i = 0; i < num_deps; ++i) {
- struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx;
+ struct drm_sched_entity *entity;
struct dma_fence *fence;
ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
if (ctx == NULL)
return -EINVAL;
- r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr,
- deps[i].ip_type,
- deps[i].ip_instance,
- deps[i].ring, &ring);
+ r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
+ deps[i].ip_instance,
+ deps[i].ring, &entity);
if (r) {
amdgpu_ctx_put(ctx);
return r;
}
- fence = amdgpu_ctx_get_fence(ctx, ring,
+ fence = amdgpu_ctx_get_fence(ctx, entity,
deps[i].handle);
if (IS_ERR(fence)) {
r = PTR_ERR(fence);
@@ -1065,7 +1104,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
{
int r;
struct dma_fence *fence;
- r = drm_syncobj_find_fence(p->filp, handle, &fence);
+ r = drm_syncobj_find_fence(p->filp, handle, 0, &fence);
if (r)
return r;
@@ -1154,71 +1193,80 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
int i;
for (i = 0; i < p->num_post_dep_syncobjs; ++i)
- drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
+ drm_syncobj_replace_fence(p->post_dep_syncobjs[i], 0, p->fence);
}
static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
- struct amdgpu_ring *ring = p->job->ring;
- struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct drm_sched_entity *entity = p->entity;
+ enum drm_sched_priority priority;
+ struct amdgpu_ring *ring;
+ struct amdgpu_bo_list_entry *e;
struct amdgpu_job *job;
- unsigned i;
uint64_t seq;
int r;
- amdgpu_mn_lock(p->mn);
- if (p->bo_list) {
- for (i = p->bo_list->first_userptr;
- i < p->bo_list->num_entries; ++i) {
- struct amdgpu_bo *bo = p->bo_list->array[i].robj;
-
- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
- amdgpu_mn_unlock(p->mn);
- return -ERESTARTSYS;
- }
- }
- }
-
job = p->job;
p->job = NULL;
- r = drm_sched_job_init(&job->base, &ring->sched, entity, p->filp);
- if (r) {
- amdgpu_job_free(job);
- amdgpu_mn_unlock(p->mn);
- return r;
+ r = drm_sched_job_init(&job->base, entity, p->filp);
+ if (r)
+ goto error_unlock;
+
+ /* No memory allocation is allowed while holding the mn lock */
+ amdgpu_mn_lock(p->mn);
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+
+ if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
+ r = -ERESTARTSYS;
+ goto error_abort;
+ }
}
job->owner = p->filp;
- job->fence_ctx = entity->fence_context;
p->fence = dma_fence_get(&job->base.s_fence->finished);
- r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq);
- if (r) {
- dma_fence_put(p->fence);
- dma_fence_put(&job->base.s_fence->finished);
- amdgpu_job_free(job);
- amdgpu_mn_unlock(p->mn);
- return r;
- }
-
+ amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
amdgpu_cs_post_dependencies(p);
+ if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
+ !p->ctx->preamble_presented) {
+ job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
+ p->ctx->preamble_presented = true;
+ }
+
cs->out.handle = seq;
job->uf_sequence = seq;
amdgpu_job_free_resources(job);
- amdgpu_ring_priority_get(job->ring, job->base.s_priority);
trace_amdgpu_cs_ioctl(job);
+ amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
+ priority = job->base.s_priority;
drm_sched_entity_push_job(&job->base, entity);
+ ring = to_amdgpu_ring(entity->rq->sched);
+ amdgpu_ring_priority_get(ring, priority);
+
+ amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
+
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
amdgpu_mn_unlock(p->mn);
return 0;
+
+error_abort:
+ dma_fence_put(&job->base.s_fence->finished);
+ job->base.s_fence = NULL;
+ amdgpu_mn_unlock(p->mn);
+
+error_unlock:
+ amdgpu_job_free(job);
+ return r;
}
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
@@ -1245,6 +1293,12 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (r)
goto out;
+ r = amdgpu_cs_dependencies(adev, &parser);
+ if (r) {
+ DRM_ERROR("Failed in the dependencies handling %d!\n", r);
+ goto out;
+ }
+
r = amdgpu_cs_parser_bos(&parser, data);
if (r) {
if (r == -ENOMEM)
@@ -1256,16 +1310,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
reserved_buffers = true;
- r = amdgpu_cs_dependencies(adev, &parser);
- if (r) {
- DRM_ERROR("Failed in the dependencies handling %d!\n", r);
- goto out;
- }
-
for (i = 0; i < parser.job->num_ibs; i++)
trace_amdgpu_cs(&parser, i);
- r = amdgpu_cs_ib_vm_chunk(adev, &parser);
+ r = amdgpu_cs_vm_handling(&parser);
if (r)
goto out;
@@ -1289,9 +1337,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
union drm_amdgpu_wait_cs *wait = data;
- struct amdgpu_device *adev = dev->dev_private;
unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
- struct amdgpu_ring *ring = NULL;
+ struct drm_sched_entity *entity;
struct amdgpu_ctx *ctx;
struct dma_fence *fence;
long r;
@@ -1300,15 +1347,14 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
if (ctx == NULL)
return -EINVAL;
- r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
- wait->in.ip_type, wait->in.ip_instance,
- wait->in.ring, &ring);
+ r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
+ wait->in.ring, &entity);
if (r) {
amdgpu_ctx_put(ctx);
return r;
}
- fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
+ fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
if (IS_ERR(fence))
r = PTR_ERR(fence);
else if (fence) {
@@ -1340,7 +1386,7 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
struct drm_file *filp,
struct drm_amdgpu_fence *user)
{
- struct amdgpu_ring *ring;
+ struct drm_sched_entity *entity;
struct amdgpu_ctx *ctx;
struct dma_fence *fence;
int r;
@@ -1349,14 +1395,14 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
if (ctx == NULL)
return ERR_PTR(-EINVAL);
- r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type,
- user->ip_instance, user->ring, &ring);
+ r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
+ user->ring, &entity);
if (r) {
amdgpu_ctx_put(ctx);
return ERR_PTR(r);
}
- fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no);
+ fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
amdgpu_ctx_put(ctx);
return fence;
@@ -1605,7 +1651,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains);
+ amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
if (r)
return r;