diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 526 |
1 files changed, 357 insertions, 169 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 6614d8a6f4c8..468003583b2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "amdgpu_sched.h" #include "amdgpu_ras.h" +#include <linux/nospec.h> #define to_amdgpu_ctx_entity(e) \ container_of((e), struct amdgpu_ctx_entity, entity) @@ -42,21 +43,61 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = { [AMDGPU_HW_IP_VCN_JPEG] = 1, }; -static int amdgpu_ctx_total_num_entities(void) +bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio) { - unsigned i, num_entities = 0; + switch (ctx_prio) { + case AMDGPU_CTX_PRIORITY_UNSET: + case AMDGPU_CTX_PRIORITY_VERY_LOW: + case AMDGPU_CTX_PRIORITY_LOW: + case AMDGPU_CTX_PRIORITY_NORMAL: + case AMDGPU_CTX_PRIORITY_HIGH: + case AMDGPU_CTX_PRIORITY_VERY_HIGH: + return true; + default: + return false; + } +} + +static enum drm_sched_priority +amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio) +{ + switch (ctx_prio) { + case AMDGPU_CTX_PRIORITY_UNSET: + return DRM_SCHED_PRIORITY_UNSET; + + case AMDGPU_CTX_PRIORITY_VERY_LOW: + return DRM_SCHED_PRIORITY_MIN; + + case AMDGPU_CTX_PRIORITY_LOW: + return DRM_SCHED_PRIORITY_MIN; + + case AMDGPU_CTX_PRIORITY_NORMAL: + return DRM_SCHED_PRIORITY_NORMAL; + + case AMDGPU_CTX_PRIORITY_HIGH: + return DRM_SCHED_PRIORITY_HIGH; + + case AMDGPU_CTX_PRIORITY_VERY_HIGH: + return DRM_SCHED_PRIORITY_HIGH; - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) - num_entities += amdgpu_ctx_num_entities[i]; + /* This should not happen as we sanitized userspace provided priority + * already, WARN if this happens. + */ + default: + WARN(1, "Invalid context priority %d\n", ctx_prio); + return DRM_SCHED_PRIORITY_NORMAL; + } - return num_entities; } static int amdgpu_ctx_priority_permit(struct drm_file *filp, - enum drm_sched_priority priority) + int32_t priority) { + if (!amdgpu_ctx_priority_is_valid(priority)) + return -EINVAL; + /* NORMAL and below are accessible by everyone */ - if (priority <= DRM_SCHED_PRIORITY_NORMAL) + if (priority <= AMDGPU_CTX_PRIORITY_NORMAL) return 0; if (capable(CAP_SYS_NICE)) @@ -68,47 +109,124 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp, return -EACCES; } +static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_compute_prio(int32_t prio) +{ + switch (prio) { + case AMDGPU_CTX_PRIORITY_HIGH: + case AMDGPU_CTX_PRIORITY_VERY_HIGH: + return AMDGPU_GFX_PIPE_PRIO_HIGH; + default: + return AMDGPU_GFX_PIPE_PRIO_NORMAL; + } +} + +static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_t prio) +{ + switch (prio) { + case AMDGPU_CTX_PRIORITY_HIGH: + return AMDGPU_RING_PRIO_1; + case AMDGPU_CTX_PRIORITY_VERY_HIGH: + return AMDGPU_RING_PRIO_2; + default: + return AMDGPU_RING_PRIO_0; + } +} + +static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip) +{ + struct amdgpu_device *adev = ctx->adev; + int32_t ctx_prio; + unsigned int hw_prio; + + ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? + ctx->init_priority : ctx->override_priority; + + switch (hw_ip) { + case AMDGPU_HW_IP_COMPUTE: + hw_prio = amdgpu_ctx_prio_to_compute_prio(ctx_prio); + break; + case AMDGPU_HW_IP_VCE: + case AMDGPU_HW_IP_VCN_ENC: + hw_prio = amdgpu_ctx_sched_prio_to_ring_prio(ctx_prio); + break; + default: + hw_prio = AMDGPU_RING_PRIO_DEFAULT; + break; + } + + hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM); + if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0) + hw_prio = AMDGPU_RING_PRIO_DEFAULT; + + return hw_prio; +} + + +static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, + const u32 ring) +{ + struct amdgpu_device *adev = ctx->adev; + struct amdgpu_ctx_entity *entity; + struct drm_gpu_scheduler **scheds = NULL, *sched = NULL; + unsigned num_scheds = 0; + int32_t ctx_prio; + unsigned int hw_prio; + enum drm_sched_priority drm_prio; + int r; + + entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs), + GFP_KERNEL); + if (!entity) + return -ENOMEM; + + ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? + ctx->init_priority : ctx->override_priority; + entity->sequence = 1; + hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip); + drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio); + + hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM); + scheds = adev->gpu_sched[hw_ip][hw_prio].sched; + num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds; + + /* disable load balance if the hw engine retains context among dependent jobs */ + if (hw_ip == AMDGPU_HW_IP_VCN_ENC || + hw_ip == AMDGPU_HW_IP_VCN_DEC || + hw_ip == AMDGPU_HW_IP_UVD_ENC || + hw_ip == AMDGPU_HW_IP_UVD) { + sched = drm_sched_pick_best(scheds, num_scheds); + scheds = &sched; + num_scheds = 1; + } + + r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, num_scheds, + &ctx->guilty); + if (r) + goto error_free_entity; + + ctx->entities[hw_ip][ring] = entity; + return 0; + +error_free_entity: + kfree(entity); + + return r; +} + static int amdgpu_ctx_init(struct amdgpu_device *adev, - enum drm_sched_priority priority, + int32_t priority, struct drm_file *filp, struct amdgpu_ctx *ctx) { - unsigned num_entities = amdgpu_ctx_total_num_entities(); - unsigned i, j, k; int r; - if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) - return -EINVAL; - r = amdgpu_ctx_priority_permit(filp, priority); if (r) return r; memset(ctx, 0, sizeof(*ctx)); - ctx->adev = adev; - ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities, - sizeof(struct dma_fence*), GFP_KERNEL); - if (!ctx->fences) - return -ENOMEM; - - ctx->entities[0] = kcalloc(num_entities, - sizeof(struct amdgpu_ctx_entity), - GFP_KERNEL); - if (!ctx->entities[0]) { - r = -ENOMEM; - goto error_free_fences; - } - - for (i = 0; i < num_entities; ++i) { - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i]; - - entity->sequence = 1; - entity->fences = &ctx->fences[amdgpu_sched_jobs * i]; - } - for (i = 1; i < AMDGPU_HW_IP_NUM; ++i) - ctx->entities[i] = ctx->entities[i - 1] + - amdgpu_ctx_num_entities[i - 1]; + ctx->adev = adev; kref_init(&ctx->refcount); spin_lock_init(&ctx->ring_lock); @@ -118,116 +236,50 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, ctx->reset_counter_query = ctx->reset_counter; ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); ctx->init_priority = priority; - ctx->override_priority = DRM_SCHED_PRIORITY_UNSET; + ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { - struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; - struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS]; - unsigned num_rings = 0; - unsigned num_rqs = 0; - - switch (i) { - case AMDGPU_HW_IP_GFX: - rings[0] = &adev->gfx.gfx_ring[0]; - num_rings = 1; - break; - case AMDGPU_HW_IP_COMPUTE: - for (j = 0; j < adev->gfx.num_compute_rings; ++j) - rings[j] = &adev->gfx.compute_ring[j]; - num_rings = adev->gfx.num_compute_rings; - break; - case AMDGPU_HW_IP_DMA: - for (j = 0; j < adev->sdma.num_instances; ++j) - rings[j] = &adev->sdma.instance[j].ring; - num_rings = adev->sdma.num_instances; - break; - case AMDGPU_HW_IP_UVD: - rings[0] = &adev->uvd.inst[0].ring; - num_rings = 1; - break; - case AMDGPU_HW_IP_VCE: - rings[0] = &adev->vce.ring[0]; - num_rings = 1; - break; - case AMDGPU_HW_IP_UVD_ENC: - rings[0] = &adev->uvd.inst[0].ring_enc[0]; - num_rings = 1; - break; - case AMDGPU_HW_IP_VCN_DEC: - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { - if (adev->vcn.harvest_config & (1 << j)) - continue; - rings[num_rings++] = &adev->vcn.inst[j].ring_dec; - } - break; - case AMDGPU_HW_IP_VCN_ENC: - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { - if (adev->vcn.harvest_config & (1 << j)) - continue; - for (k = 0; k < adev->vcn.num_enc_rings; ++k) - rings[num_rings++] = &adev->vcn.inst[j].ring_enc[k]; - } - break; - case AMDGPU_HW_IP_VCN_JPEG: - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { - if (adev->vcn.harvest_config & (1 << j)) - continue; - rings[num_rings++] = &adev->vcn.inst[j].ring_jpeg; - } - break; - } - - for (j = 0; j < num_rings; ++j) { - if (!rings[j]->adev) - continue; + return 0; +} - rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority]; - } +static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) +{ - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) - r = drm_sched_entity_init(&ctx->entities[i][j].entity, - rqs, num_rqs, &ctx->guilty); - if (r) - goto error_cleanup_entities; - } + int i; - return 0; + if (!entity) + return; -error_cleanup_entities: - for (i = 0; i < num_entities; ++i) - drm_sched_entity_destroy(&ctx->entities[0][i].entity); - kfree(ctx->entities[0]); + for (i = 0; i < amdgpu_sched_jobs; ++i) + dma_fence_put(entity->fences[i]); -error_free_fences: - kfree(ctx->fences); - ctx->fences = NULL; - return r; + kfree(entity); } static void amdgpu_ctx_fini(struct kref *ref) { struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); - unsigned num_entities = amdgpu_ctx_total_num_entities(); struct amdgpu_device *adev = ctx->adev; unsigned i, j; if (!adev) return; - for (i = 0; i < num_entities; ++i) - for (j = 0; j < amdgpu_sched_jobs; ++j) - dma_fence_put(ctx->entities[0][i].fences[j]); - kfree(ctx->fences); - kfree(ctx->entities[0]); + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) { + amdgpu_ctx_fini_entity(ctx->entities[i][j]); + ctx->entities[i][j] = NULL; + } + } mutex_destroy(&ctx->lock); - kfree(ctx); } int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, u32 ring, struct drm_sched_entity **entity) { + int r; + if (hw_ip >= AMDGPU_HW_IP_NUM) { DRM_ERROR("unknown HW IP type: %d\n", hw_ip); return -EINVAL; @@ -244,14 +296,20 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, return -EINVAL; } - *entity = &ctx->entities[hw_ip][ring].entity; + if (ctx->entities[hw_ip][ring] == NULL) { + r = amdgpu_ctx_init_entity(ctx, hw_ip, ring); + if (r) + return r; + } + + *entity = &ctx->entities[hw_ip][ring]->entity; return 0; } static int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, struct drm_file *filp, - enum drm_sched_priority priority, + int32_t priority, uint32_t *id) { struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; @@ -284,14 +342,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, static void amdgpu_ctx_do_release(struct kref *ref) { struct amdgpu_ctx *ctx; - unsigned num_entities; - u32 i; + u32 i, j; ctx = container_of(ref, struct amdgpu_ctx, refcount); + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { + if (!ctx->entities[i][j]) + continue; - num_entities = amdgpu_ctx_total_num_entities(); - for (i = 0; i < num_entities; i++) - drm_sched_entity_destroy(&ctx->entities[0][i].entity); + drm_sched_entity_destroy(&ctx->entities[i][j]->entity); + } + } amdgpu_ctx_fini(ref); } @@ -345,13 +406,15 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, return 0; } +#define AMDGPU_RAS_COUNTE_DELAY_MS 3000 + static int amdgpu_ctx_query2(struct amdgpu_device *adev, - struct amdgpu_fpriv *fpriv, uint32_t id, - union drm_amdgpu_ctx_out *out) + struct amdgpu_fpriv *fpriv, uint32_t id, + union drm_amdgpu_ctx_out *out) { + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct amdgpu_ctx *ctx; struct amdgpu_ctx_mgr *mgr; - unsigned long ras_counter; if (!fpriv) return -EINVAL; @@ -376,19 +439,28 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, if (atomic_read(&ctx->guilty)) out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY; - /*query ue count*/ - ras_counter = amdgpu_ras_query_error_count(adev, false); - /*ras counter is monotonic increasing*/ - if (ras_counter != ctx->ras_counter_ue) { - out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE; - ctx->ras_counter_ue = ras_counter; - } + if (adev->ras_enabled && con) { + /* Return the cached values in O(1), + * and schedule delayed work to cache + * new vaues. + */ + int ce_count, ue_count; + + ce_count = atomic_read(&con->ras_ce_count); + ue_count = atomic_read(&con->ras_ue_count); + + if (ce_count != ctx->ras_counter_ce) { + ctx->ras_counter_ce = ce_count; + out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE; + } + + if (ue_count != ctx->ras_counter_ue) { + ctx->ras_counter_ue = ue_count; + out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE; + } - /*query ce count*/ - ras_counter = amdgpu_ras_query_error_count(adev, true); - if (ras_counter != ctx->ras_counter_ce) { - out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE; - ctx->ras_counter_ce = ras_counter; + schedule_delayed_work(&con->ras_counte_delay_work, + msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS)); } mutex_unlock(&mgr->lock); @@ -400,20 +472,19 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, { int r; uint32_t id; - enum drm_sched_priority priority; + int32_t priority; union drm_amdgpu_ctx *args = data; - struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_fpriv *fpriv = filp->driver_priv; - r = 0; id = args->in.ctx_id; - priority = amdgpu_to_sched_priority(args->in.priority); + priority = args->in.priority; /* For backwards compatibility reasons, we need to accept * ioctls with garbage in the priority field */ - if (priority == DRM_SCHED_PRIORITY_INVALID) - priority = DRM_SCHED_PRIORITY_NORMAL; + if (!amdgpu_ctx_priority_is_valid(priority)) + priority = AMDGPU_CTX_PRIORITY_NORMAL; switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: @@ -465,7 +536,7 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity, - struct dma_fence *fence, uint64_t* handle) + struct dma_fence *fence, uint64_t *handle) { struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); uint64_t seq = centity->sequence; @@ -518,22 +589,49 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, return fence; } +static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx, + struct amdgpu_ctx_entity *aentity, + int hw_ip, + int32_t priority) +{ + struct amdgpu_device *adev = ctx->adev; + unsigned int hw_prio; + struct drm_gpu_scheduler **scheds = NULL; + unsigned num_scheds; + + /* set sw priority */ + drm_sched_entity_set_priority(&aentity->entity, + amdgpu_ctx_to_drm_sched_prio(priority)); + + /* set hw priority */ + if (hw_ip == AMDGPU_HW_IP_COMPUTE) { + hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip); + hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX); + scheds = adev->gpu_sched[hw_ip][hw_prio].sched; + num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds; + drm_sched_entity_modify_sched(&aentity->entity, scheds, + num_scheds); + } +} + void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, - enum drm_sched_priority priority) + int32_t priority) { - unsigned num_entities = amdgpu_ctx_total_num_entities(); - enum drm_sched_priority ctx_prio; - unsigned i; + int32_t ctx_prio; + unsigned i, j; ctx->override_priority = priority; - ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ? + ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? ctx->init_priority : ctx->override_priority; + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { + if (!ctx->entities[i][j]) + continue; - for (i = 0; i < num_entities; i++) { - struct drm_sched_entity *entity = &ctx->entities[0][i].entity; - - drm_sched_entity_set_priority(entity, ctx_prio); + amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j], + i, ctx_prio); + } } } @@ -569,20 +667,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) { - unsigned num_entities = amdgpu_ctx_total_num_entities(); struct amdgpu_ctx *ctx; struct idr *idp; - uint32_t id, i; + uint32_t id, i, j; idp = &mgr->ctx_handles; mutex_lock(&mgr->lock); idr_for_each_entry(idp, ctx, id) { - for (i = 0; i < num_entities; i++) { - struct drm_sched_entity *entity; + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { + struct drm_sched_entity *entity; + + if (!ctx->entities[i][j]) + continue; - entity = &ctx->entities[0][i].entity; - timeout = drm_sched_entity_flush(entity, timeout); + entity = &ctx->entities[i][j]->entity; + timeout = drm_sched_entity_flush(entity, timeout); + } } } mutex_unlock(&mgr->lock); @@ -591,10 +693,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) { - unsigned num_entities = amdgpu_ctx_total_num_entities(); struct amdgpu_ctx *ctx; struct idr *idp; - uint32_t id, i; + uint32_t id, i, j; idp = &mgr->ctx_handles; @@ -604,8 +705,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) continue; } - for (i = 0; i < num_entities; i++) - drm_sched_entity_fini(&ctx->entities[0][i].entity); + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { + struct drm_sched_entity *entity; + + if (!ctx->entities[i][j]) + continue; + + entity = &ctx->entities[i][j]->entity; + drm_sched_entity_fini(entity); + } + } } } @@ -627,3 +737,81 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) idr_destroy(&mgr->ctx_handles); mutex_destroy(&mgr->lock); } + +static void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, + struct amdgpu_ctx_entity *centity, ktime_t *total, ktime_t *max) +{ + ktime_t now, t1; + uint32_t i; + + *total = *max = 0; + + now = ktime_get(); + for (i = 0; i < amdgpu_sched_jobs; i++) { + struct dma_fence *fence; + struct drm_sched_fence *s_fence; + + spin_lock(&ctx->ring_lock); + fence = dma_fence_get(centity->fences[i]); + spin_unlock(&ctx->ring_lock); + if (!fence) + continue; + s_fence = to_drm_sched_fence(fence); + if (!dma_fence_is_signaled(&s_fence->scheduled)) { + dma_fence_put(fence); + continue; + } + t1 = s_fence->scheduled.timestamp; + if (!ktime_before(t1, now)) { + dma_fence_put(fence); + continue; + } + if (dma_fence_is_signaled(&s_fence->finished) && + s_fence->finished.timestamp < now) + *total += ktime_sub(s_fence->finished.timestamp, t1); + else + *total += ktime_sub(now, t1); + t1 = ktime_sub(now, t1); + dma_fence_put(fence); + *max = max(t1, *max); + } +} + +ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip, + uint32_t idx, uint64_t *elapsed) +{ + struct idr *idp; + struct amdgpu_ctx *ctx; + uint32_t id; + struct amdgpu_ctx_entity *centity; + ktime_t total = 0, max = 0; + + if (idx >= AMDGPU_MAX_ENTITY_NUM) + return 0; + idp = &mgr->ctx_handles; + mutex_lock(&mgr->lock); + idr_for_each_entry(idp, ctx, id) { + ktime_t ttotal, tmax; + + if (!ctx->entities[hwip][idx]) + continue; + + centity = ctx->entities[hwip][idx]; + amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax); + + /* Harmonic mean approximation diverges for very small + * values. If ratio < 0.01% ignore + */ + if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal)) + continue; + + total = ktime_add(total, ttotal); + max = ktime_after(tmax, max) ? tmax : max; + } + + mutex_unlock(&mgr->lock); + if (elapsed) + *elapsed = max; + + return total; +} |