diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 135 |
1 files changed, 90 insertions, 45 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8516c814bc9b..040f4cb6ab2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -32,6 +32,8 @@ #include <drm/amdgpu_drm.h> #include <drm/drm_syncobj.h> +#include <drm/ttm/ttm_tt.h> + #include "amdgpu_cs.h" #include "amdgpu.h" #include "amdgpu_trace.h" @@ -61,6 +63,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, amdgpu_ctx_put(p->ctx); return -ECANCELED; } + + amdgpu_sync_create(&p->sync); return 0; } @@ -108,6 +112,9 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p, if (r < 0) return r; + if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type)) + return -EINVAL; + ++(num_ibs[r]); p->gang_leader_idx = r; return 0; @@ -129,9 +136,6 @@ static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p, bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); p->uf_entry.priority = 0; p->uf_entry.tv.bo = &bo->tbo; - /* One for TTM and two for the CS job */ - p->uf_entry.tv.num_shared = 3; - drm_gem_object_put(gobj); size = amdgpu_bo_size(bo); @@ -188,7 +192,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, uint64_t *chunk_array_user; uint64_t *chunk_array; uint32_t uf_offset = 0; - unsigned int size; + size_t size; int ret; int i; @@ -281,6 +285,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW: break; default: @@ -301,7 +306,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, } p->gang_leader = p->jobs[p->gang_leader_idx]; - if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) { + if (p->ctx->generation != p->gang_leader->generation) { ret = -ECANCELED; goto free_all_kdata; } @@ -389,7 +394,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p, { struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata; struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - unsigned num_deps; + unsigned int num_deps; int i, r; num_deps = chunk->length_dw * 4 / @@ -452,18 +457,6 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p, } r = amdgpu_sync_fence(&p->sync, fence); - if (r) - goto error; - - /* - * When we have an explicit dependency it might be necessary to insert a - * pipeline sync to make sure that all caches etc are flushed and the - * next job actually sees the results from the previous one. - */ - if (fence->context == p->gang_leader->base.entity->fence_context) - r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); - -error: dma_fence_put(fence); return r; } @@ -472,7 +465,7 @@ static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p, struct amdgpu_cs_chunk *chunk) { struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata; - unsigned num_deps; + unsigned int num_deps; int i, r; num_deps = chunk->length_dw * 4 / @@ -490,7 +483,7 @@ static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p, struct amdgpu_cs_chunk *chunk) { struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata; - unsigned num_deps; + unsigned int num_deps; int i, r; num_deps = chunk->length_dw * 4 / @@ -510,7 +503,7 @@ static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p, struct amdgpu_cs_chunk *chunk) { struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata; - unsigned num_deps; + unsigned int num_deps; int i; num_deps = chunk->length_dw * 4 / @@ -544,7 +537,7 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p, struct amdgpu_cs_chunk *chunk) { struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata; - unsigned num_deps; + unsigned int num_deps; int i; num_deps = chunk->length_dw * 4 / @@ -583,6 +576,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p, return 0; } +static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata; + int i; + + if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW) + return -EINVAL; + + for (i = 0; i < p->gang_size; ++i) { + p->jobs[i]->shadow_va = shadow->shadow_va; + p->jobs[i]->csa_va = shadow->csa_va; + p->jobs[i]->gds_va = shadow->gds_va; + p->jobs[i]->init_shadow = + shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW; + } + + return 0; +} + static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) { unsigned int ce_preempt = 0, de_preempt = 0; @@ -625,6 +638,11 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) if (r) return r; break; + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW: + r = amdgpu_cs_p2_shadow(p, chunk); + if (r) + return r; + break; } } @@ -737,6 +755,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, if (used_vis_vram < total_vis_vram) { u64 free_vis_vram = total_vis_vram - used_vis_vram; + adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis + increment_us, us_upper_bound); @@ -890,15 +909,19 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, mutex_lock(&p->bo_list->bo_list_mutex); - /* One for TTM and one for the CS job */ + /* One for TTM and one for each CS job */ amdgpu_bo_list_for_each_entry(e, p->bo_list) - e->tv.num_shared = 2; + e->tv.num_shared = 1 + p->gang_size; + p->uf_entry.tv.num_shared = 1 + p->gang_size; amdgpu_bo_list_get_list(p->bo_list, &p->validated); INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); + /* Two for VM updates, one for TTM and one for each CS job */ + p->vm_pd.tv.num_shared = 3 + p->gang_size; + if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) list_add(&p->uf_entry.tv.head, &p->validated); @@ -1055,9 +1078,8 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, /* the IB should be reserved at this point */ r = amdgpu_bo_kmap(aobj, (void **)&kptr); - if (r) { + if (r) return r; - } kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE); @@ -1188,10 +1210,19 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct drm_gpu_scheduler *sched; struct amdgpu_bo_list_entry *e; + struct dma_fence *fence; unsigned int i; int r; + r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); + if (r) { + if (r != -ERESTARTSYS) + DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); + return r; + } + list_for_each_entry(e, &p->validated, tv.head) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); struct dma_resv *resv = bo->tbo.base.resv; @@ -1211,10 +1242,27 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) return r; } - r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); - if (r && r != -ERESTARTSYS) - DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); - return r; + sched = p->gang_leader->base.entity->rq->sched; + while ((fence = amdgpu_sync_get_fence(&p->sync))) { + struct drm_sched_fence *s_fence = to_drm_sched_fence(fence); + + /* + * When we have an dependency it might be necessary to insert a + * pipeline sync to make sure that all caches etc are flushed and the + * next job actually sees the results from the previous one + * before we start executing on the same scheduler ring. + */ + if (!s_fence || s_fence->sched != sched) { + dma_fence_put(fence); + continue; + } + + r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); + dma_fence_put(fence); + if (r) + return r; + } + return 0; } static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) @@ -1254,9 +1302,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, continue; fence = &p->jobs[i]->base.s_fence->scheduled; + dma_fence_get(fence); r = drm_sched_job_add_dependency(&leader->base, fence); - if (r) - goto error_cleanup; + if (r) { + dma_fence_put(fence); + return r; + } } if (p->gang_size > 1) { @@ -1282,7 +1333,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, } if (r) { r = -EAGAIN; - goto error_unlock; + mutex_unlock(&p->adev->notifier_lock); + return r; } p->fence = dma_fence_get(&leader->base.s_fence->finished); @@ -1329,21 +1381,14 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, mutex_unlock(&p->adev->notifier_lock); mutex_unlock(&p->bo_list->bo_list_mutex); return 0; - -error_unlock: - mutex_unlock(&p->adev->notifier_lock); - -error_cleanup: - for (i = 0; i < p->gang_size; ++i) - drm_sched_job_cleanup(&p->jobs[i]->base); - return r; } /* Cleanup the parser structure */ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser) { - unsigned i; + unsigned int i; + amdgpu_sync_free(&parser->sync); for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); kfree(parser->post_deps[i].chain); @@ -1609,15 +1654,15 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev, continue; r = dma_fence_wait_timeout(fence, true, timeout); + if (r > 0 && fence->error) + r = fence->error; + dma_fence_put(fence); if (r < 0) return r; if (r == 0) break; - - if (fence->error) - return fence->error; } memset(wait, 0, sizeof(*wait)); |
