From 163433e5c50afd8276f7dfdead4bf35f1bdafd05 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 14 Jan 2021 13:56:08 +0000 Subject: drm/i915: Mark up protected uses of 'i915_request_completed' When we know that we are inside the timeline mutex, or inside the submission flow (under active.lock or the holder's rcu lock), we know that the rq->hwsp is stable and we can use the simpler direct version. Signed-off-by: Chris Wilson Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20210114135612.13210-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 0b1a46a0d866..784c05ac5cca 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -276,7 +276,7 @@ static void remove_from_engine(struct i915_request *rq) bool i915_request_retire(struct i915_request *rq) { - if (!i915_request_completed(rq)) + if (!__i915_request_is_complete(rq)) return false; RQ_TRACE(rq, "\n"); @@ -342,8 +342,7 @@ void i915_request_retire_upto(struct i915_request *rq) struct i915_request *tmp; RQ_TRACE(rq, "\n"); - - GEM_BUG_ON(!i915_request_completed(rq)); + GEM_BUG_ON(!__i915_request_is_complete(rq)); do { tmp = list_first_entry(&tl->requests, typeof(*tmp), link); @@ -552,7 +551,7 @@ bool __i915_request_submit(struct i915_request *request) * dropped upon retiring. (Otherwise if resubmit a *retired* * request, this would be a horrible use-after-free.) */ - if (i915_request_completed(request)) + if (__i915_request_is_complete(request)) goto xfer; if (unlikely(intel_context_is_banned(request->context))) @@ -652,7 +651,7 @@ void __i915_request_unsubmit(struct i915_request *request) i915_request_cancel_breadcrumb(request); /* We've already spun, don't charge on resubmitting. */ - if (request->sched.semaphores && i915_request_started(request)) + if (request->sched.semaphores && __i915_request_has_started(request)) request->sched.semaphores = 0; /* @@ -864,7 +863,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) RCU_INIT_POINTER(rq->timeline, tl); RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline); rq->hwsp_seqno = tl->hwsp_seqno; - GEM_BUG_ON(i915_request_completed(rq)); + GEM_BUG_ON(__i915_request_is_complete(rq)); rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ @@ -978,7 +977,7 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) struct i915_request *prev; /* Confirm signal has not been retired, the link is valid */ - if (unlikely(i915_request_started(signal))) + if (unlikely(__i915_request_has_started(signal))) break; /* Is signal the earliest request on its timeline? */ @@ -1520,7 +1519,7 @@ __i915_request_add_to_timeline(struct i915_request *rq) */ prev = to_request(__i915_active_fence_set(&timeline->last_request, &rq->fence)); - if (prev && !i915_request_completed(prev)) { + if (prev && !__i915_request_is_complete(prev)) { /* * The requests are supposed to be kept in order. However, * we need to be wary in case the timeline->last_request @@ -1897,10 +1896,10 @@ static char queue_status(const struct i915_request *rq) static const char *run_status(const struct i915_request *rq) { - if (i915_request_completed(rq)) + if (__i915_request_is_complete(rq)) return "!"; - if (i915_request_started(rq)) + if (__i915_request_has_started(rq)) return "*"; if (!i915_sw_fence_signaled(&rq->semaphore)) -- cgit v1.2.3-70-g09d2 From b2fe00bbb2b6bf06929dc300a4e3e91d9c12a3c9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 14 Jan 2021 13:56:09 +0000 Subject: drm/i915: Drop i915_request.lock serialisation around await_start Originally, we used the signal->lock as a means of following the previous link in its timeline and peeking at the previous fence. However, we have replaced the explicit serialisation with a series of very careful probes that anticipate the links being deleted and the fences recycled before we are able to acquire a strong reference to it. We do not need the signal->lock crutch anymore, nor want the contention. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20210114135612.13210-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 784c05ac5cca..973eceabbcca 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -969,9 +969,16 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) if (i915_request_started(signal)) return 0; + /* + * The caller holds a reference on @signal, but we do not serialise + * against it being retired and removed from the lists. + * + * We do not hold a reference to the request before @signal, and + * so must be very careful to ensure that it is not _recycled_ as + * we follow the link backwards. + */ fence = NULL; rcu_read_lock(); - spin_lock_irq(&signal->lock); do { struct list_head *pos = READ_ONCE(signal->link.prev); struct i915_request *prev; @@ -1002,7 +1009,6 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) fence = &prev->fence; } while (0); - spin_unlock_irq(&signal->lock); rcu_read_unlock(); if (!fence) return 0; -- cgit v1.2.3-70-g09d2 From 9736387a9f17a521a8aff3bc64814f335a6f09e8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 14 Jan 2021 13:56:12 +0000 Subject: drm/i915: Reduce test_and_set_bit to set_bit in i915_request_submit() Avoid the full blown memory barrier of test_and_set_bit() by noting the completed request and removing it from the lists. Signed-off-by: Chris Wilson Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20210114135612.13210-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 973eceabbcca..22e39d938f17 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -551,8 +551,10 @@ bool __i915_request_submit(struct i915_request *request) * dropped upon retiring. (Otherwise if resubmit a *retired* * request, this would be a horrible use-after-free.) */ - if (__i915_request_is_complete(request)) - goto xfer; + if (__i915_request_is_complete(request)) { + list_del_init(&request->sched.link); + goto active; + } if (unlikely(intel_context_is_banned(request->context))) i915_request_set_error_once(request, -EIO); @@ -587,11 +589,11 @@ bool __i915_request_submit(struct i915_request *request) engine->serial++; result = true; -xfer: - if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) { - list_move_tail(&request->sched.link, &engine->active.requests); - clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); - } + GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); + list_move_tail(&request->sched.link, &engine->active.requests); +active: + clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); + set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); /* * XXX Rollback bonded-execution on __i915_request_unsubmit()? -- cgit v1.2.3-70-g09d2