summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_request.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_request.c')
-rw-r--r--drivers/gpu/drm/i915/i915_request.c234
1 files changed, 141 insertions, 93 deletions
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index e5a55801f753..c0df71d7d0ff 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -290,7 +290,7 @@ bool i915_request_retire(struct i915_request *rq)
spin_unlock_irq(&rq->lock);
remove_from_client(rq);
- list_del_rcu(&rq->link);
+ __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */
intel_context_exit(rq->context);
intel_context_unpin(rq->context);
@@ -363,6 +363,50 @@ __await_execution(struct i915_request *rq,
return 0;
}
+static bool fatal_error(int error)
+{
+ switch (error) {
+ case 0: /* not an error! */
+ case -EAGAIN: /* innocent victim of a GT reset (__i915_request_reset) */
+ case -ETIMEDOUT: /* waiting for Godot (timer_i915_sw_fence_wake) */
+ return false;
+ default:
+ return true;
+ }
+}
+
+void __i915_request_skip(struct i915_request *rq)
+{
+ GEM_BUG_ON(!fatal_error(rq->fence.error));
+
+ if (rq->infix == rq->postfix)
+ return;
+
+ /*
+ * As this request likely depends on state from the lost
+ * context, clear out all the user operations leaving the
+ * breadcrumb at the end (so we get the fence notifications).
+ */
+ __i915_request_fill(rq, 0);
+ rq->infix = rq->postfix;
+}
+
+void i915_request_set_error_once(struct i915_request *rq, int error)
+{
+ int old;
+
+ GEM_BUG_ON(!IS_ERR_VALUE((long)error));
+
+ if (i915_request_signaled(rq))
+ return;
+
+ old = READ_ONCE(rq->fence.error);
+ do {
+ if (fatal_error(old))
+ return;
+ } while (!try_cmpxchg(&rq->fence.error, &old, error));
+}
+
bool __i915_request_submit(struct i915_request *request)
{
struct intel_engine_cs *engine = request->engine;
@@ -392,8 +436,10 @@ bool __i915_request_submit(struct i915_request *request)
if (i915_request_completed(request))
goto xfer;
- if (intel_context_is_banned(request->context))
- i915_request_skip(request, -EIO);
+ if (unlikely(intel_context_is_banned(request->context)))
+ i915_request_set_error_once(request, -EIO);
+ if (unlikely(fatal_error(request->fence.error)))
+ __i915_request_skip(request);
/*
* Are we using semaphores when the gpu is already saturated?
@@ -519,7 +565,7 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
trace_i915_request_submit(request);
if (unlikely(fence->error))
- i915_request_skip(request, fence->error);
+ i915_request_set_error_once(request, fence->error);
/*
* We need to serialize use of the submit_request() callback
@@ -542,19 +588,31 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
return NOTIFY_DONE;
}
+static void irq_semaphore_cb(struct irq_work *wrk)
+{
+ struct i915_request *rq =
+ container_of(wrk, typeof(*rq), semaphore_work);
+
+ i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE);
+ i915_request_put(rq);
+}
+
static int __i915_sw_fence_call
semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{
- struct i915_request *request =
- container_of(fence, typeof(*request), semaphore);
+ struct i915_request *rq = container_of(fence, typeof(*rq), semaphore);
switch (state) {
case FENCE_COMPLETE:
- i915_schedule_bump_priority(request, I915_PRIORITY_NOSEMAPHORE);
+ if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) {
+ i915_request_get(rq);
+ init_irq_work(&rq->semaphore_work, irq_semaphore_cb);
+ irq_work_queue(&rq->semaphore_work);
+ }
break;
case FENCE_FREE:
- i915_request_put(request);
+ i915_request_put(rq);
break;
}
@@ -691,6 +749,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
RCU_INIT_POINTER(rq->timeline, tl);
RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline);
rq->hwsp_seqno = tl->hwsp_seqno;
+ GEM_BUG_ON(i915_request_completed(rq));
rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
@@ -791,8 +850,8 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
struct dma_fence *fence;
int err;
- GEM_BUG_ON(i915_request_timeline(rq) ==
- rcu_access_pointer(signal->timeline));
+ if (i915_request_timeline(rq) == rcu_access_pointer(signal->timeline))
+ return 0;
if (i915_request_started(signal))
return 0;
@@ -836,7 +895,7 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
return 0;
err = 0;
- if (intel_timeline_sync_is_later(i915_request_timeline(rq), fence))
+ if (!intel_timeline_sync_is_later(i915_request_timeline(rq), fence))
err = i915_sw_fence_await_dma_fence(&rq->submit,
fence, 0,
I915_FENCE_GFP);
@@ -860,7 +919,7 @@ already_busywaiting(struct i915_request *rq)
*
* See the are-we-too-late? check in __i915_request_submit().
*/
- return rq->sched.semaphores | rq->engine->saturated;
+ return rq->sched.semaphores | READ_ONCE(rq->engine->saturated);
}
static int
@@ -918,6 +977,8 @@ emit_semaphore_wait(struct i915_request *to,
struct i915_request *from,
gfp_t gfp)
{
+ const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask;
+
if (!intel_context_use_semaphores(to->context))
goto await_fence;
@@ -925,7 +986,7 @@ emit_semaphore_wait(struct i915_request *to,
goto await_fence;
/* Just emit the first semaphore we see as request space is limited. */
- if (already_busywaiting(to) & from->engine->mask)
+ if (already_busywaiting(to) & mask)
goto await_fence;
if (i915_request_await_start(to, from) < 0)
@@ -938,7 +999,7 @@ emit_semaphore_wait(struct i915_request *to,
if (__emit_semaphore_wait(to, from, from->fence.seqno))
goto await_fence;
- to->sched.semaphores |= from->engine->mask;
+ to->sched.semaphores |= mask;
to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
return 0;
@@ -1081,14 +1142,45 @@ __i915_request_await_execution(struct i915_request *to,
&from->fence))
return 0;
- /* Ensure both start together [after all semaphores in signal] */
- if (intel_engine_has_semaphores(to->engine))
- err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
- else
- err = i915_request_await_start(to, from);
+ /*
+ * Wait until the start of this request.
+ *
+ * The execution cb fires when we submit the request to HW. But in
+ * many cases this may be long before the request itself is ready to
+ * run (consider that we submit 2 requests for the same context, where
+ * the request of interest is behind an indefinite spinner). So we hook
+ * up to both to reduce our queues and keep the execution lag minimised
+ * in the worst case, though we hope that the await_start is elided.
+ */
+ err = i915_request_await_start(to, from);
if (err < 0)
return err;
+ /*
+ * Ensure both start together [after all semaphores in signal]
+ *
+ * Now that we are queued to the HW at roughly the same time (thanks
+ * to the execute cb) and are ready to run at roughly the same time
+ * (thanks to the await start), our signaler may still be indefinitely
+ * delayed by waiting on a semaphore from a remote engine. If our
+ * signaler depends on a semaphore, so indirectly do we, and we do not
+ * want to start our payload until our signaler also starts theirs.
+ * So we wait.
+ *
+ * However, there is also a second condition for which we need to wait
+ * for the precise start of the signaler. Consider that the signaler
+ * was submitted in a chain of requests following another context
+ * (with just an ordinary intra-engine fence dependency between the
+ * two). In this case the signaler is queued to HW, but not for
+ * immediate execution, and so we must wait until it reaches the
+ * active slot.
+ */
+ if (intel_engine_has_semaphores(to->engine)) {
+ err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
+ if (err < 0)
+ return err;
+ }
+
/* Couple the dependency tree for PI on this exposed to->fence */
if (to->engine->schedule) {
err = i915_sched_node_add_dependency(&to->sched, &from->sched);
@@ -1209,23 +1301,6 @@ i915_request_await_object(struct i915_request *to,
return ret;
}
-void i915_request_skip(struct i915_request *rq, int error)
-{
- GEM_BUG_ON(!IS_ERR_VALUE((long)error));
- dma_fence_set_error(&rq->fence, error);
-
- if (rq->infix == rq->postfix)
- return;
-
- /*
- * As this request likely depends on state from the lost
- * context, clear out all the user operations leaving the
- * breadcrumb at the end (so we get the fence notifications).
- */
- __i915_request_fill(rq, 0);
- rq->infix = rq->postfix;
-}
-
static struct i915_request *
__i915_request_add_to_timeline(struct i915_request *rq)
{
@@ -1255,7 +1330,17 @@ __i915_request_add_to_timeline(struct i915_request *rq)
prev = to_request(__i915_active_fence_set(&timeline->last_request,
&rq->fence));
if (prev && !i915_request_completed(prev)) {
- if (is_power_of_2(prev->engine->mask | rq->engine->mask))
+ /*
+ * The requests are supposed to be kept in order. However,
+ * we need to be wary in case the timeline->last_request
+ * is used as a barrier for external modification to this
+ * context.
+ */
+ GEM_BUG_ON(prev->context == rq->context &&
+ i915_seqno_passed(prev->fence.seqno,
+ rq->fence.seqno));
+
+ if (is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask))
i915_sw_fence_await_sw_fence(&rq->submit,
&prev->submit,
&rq->submitq);
@@ -1329,9 +1414,9 @@ void __i915_request_queue(struct i915_request *rq,
* decide whether to preempt the entire chain so that it is ready to
* run at the earliest possible convenience.
*/
- i915_sw_fence_commit(&rq->semaphore);
if (attr && rq->engine->schedule)
rq->engine->schedule(rq, attr);
+ i915_sw_fence_commit(&rq->semaphore);
i915_sw_fence_commit(&rq->submit);
}
@@ -1339,39 +1424,23 @@ void i915_request_add(struct i915_request *rq)
{
struct intel_timeline * const tl = i915_request_timeline(rq);
struct i915_sched_attr attr = {};
- struct i915_request *prev;
+ struct i915_gem_context *ctx;
lockdep_assert_held(&tl->mutex);
lockdep_unpin_lock(&tl->mutex, rq->cookie);
trace_i915_request_add(rq);
+ __i915_request_commit(rq);
- prev = __i915_request_commit(rq);
-
- if (rcu_access_pointer(rq->context->gem_context))
- attr = i915_request_gem_context(rq)->sched;
+ /* XXX placeholder for selftests */
+ rcu_read_lock();
+ ctx = rcu_dereference(rq->context->gem_context);
+ if (ctx)
+ attr = ctx->sched;
+ rcu_read_unlock();
- /*
- * Boost actual workloads past semaphores!
- *
- * With semaphores we spin on one engine waiting for another,
- * simply to reduce the latency of starting our work when
- * the signaler completes. However, if there is any other
- * work that we could be doing on this engine instead, that
- * is better utilisation and will reduce the overall duration
- * of the current work. To avoid PI boosting a semaphore
- * far in the distance past over useful work, we keep a history
- * of any semaphore use along our dependency chain.
- */
if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
attr.priority |= I915_PRIORITY_NOSEMAPHORE;
-
- /*
- * Boost priorities to new clients (new request flows).
- *
- * Allow interactive/synchronous clients to jump ahead of
- * the bulk clients. (FQ_CODEL)
- */
if (list_empty(&rq->sched.signalers_list))
attr.priority |= I915_PRIORITY_WAIT;
@@ -1379,32 +1448,10 @@ void i915_request_add(struct i915_request *rq)
__i915_request_queue(rq, &attr);
local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
- /*
- * In typical scenarios, we do not expect the previous request on
- * the timeline to be still tracked by timeline->last_request if it
- * has been completed. If the completed request is still here, that
- * implies that request retirement is a long way behind submission,
- * suggesting that we haven't been retiring frequently enough from
- * the combination of retire-before-alloc, waiters and the background
- * retirement worker. So if the last request on this timeline was
- * already completed, do a catch up pass, flushing the retirement queue
- * up to this client. Since we have now moved the heaviest operations
- * during retirement onto secondary workers, such as freeing objects
- * or contexts, retiring a bunch of requests is mostly list management
- * (and cache misses), and so we should not be overly penalizing this
- * client by performing excess work, though we may still performing
- * work on behalf of others -- but instead we should benefit from
- * improved resource management. (Well, that's the theory at least.)
- */
- if (prev &&
- i915_request_completed(prev) &&
- rcu_access_pointer(prev->timeline) == tl)
- i915_request_retire_upto(prev);
-
mutex_unlock(&tl->mutex);
}
-static unsigned long local_clock_us(unsigned int *cpu)
+static unsigned long local_clock_ns(unsigned int *cpu)
{
unsigned long t;
@@ -1421,7 +1468,7 @@ static unsigned long local_clock_us(unsigned int *cpu)
* stop busywaiting, see busywait_stop().
*/
*cpu = get_cpu();
- t = local_clock() >> 10;
+ t = local_clock();
put_cpu();
return t;
@@ -1431,15 +1478,15 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
{
unsigned int this_cpu;
- if (time_after(local_clock_us(&this_cpu), timeout))
+ if (time_after(local_clock_ns(&this_cpu), timeout))
return true;
return this_cpu != cpu;
}
-static bool __i915_spin_request(const struct i915_request * const rq,
- int state, unsigned long timeout_us)
+static bool __i915_spin_request(const struct i915_request * const rq, int state)
{
+ unsigned long timeout_ns;
unsigned int cpu;
/*
@@ -1467,7 +1514,8 @@ static bool __i915_spin_request(const struct i915_request * const rq,
* takes to sleep on a request, on the order of a microsecond.
*/
- timeout_us += local_clock_us(&cpu);
+ timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns);
+ timeout_ns += local_clock_ns(&cpu);
do {
if (i915_request_completed(rq))
return true;
@@ -1475,7 +1523,7 @@ static bool __i915_spin_request(const struct i915_request * const rq,
if (signal_pending_state(state, current))
break;
- if (busywait_stop(timeout_us, cpu))
+ if (busywait_stop(timeout_ns, cpu))
break;
cpu_relax();
@@ -1561,8 +1609,8 @@ long i915_request_wait(struct i915_request *rq,
* completion. That requires having a good predictor for the request
* duration, which we currently lack.
*/
- if (IS_ACTIVE(CONFIG_DRM_I915_SPIN_REQUEST) &&
- __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) {
+ if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) &&
+ __i915_spin_request(rq, state)) {
dma_fence_signal(&rq->fence);
goto out;
}