From 116788689bf8ca3c3337d8dd1a742c08342d2f62 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 15 May 2020 19:35:45 -0700 Subject: drm/i915: Mark check_shadow_context_ppgtt as maybe unused When CONFIG_DRM_I915_DEBUG_GEM is not set, clang warns: drivers/gpu/drm/i915/gvt/scheduler.c:884:1: warning: function 'check_shadow_context_ppgtt' is not needed and will not be emitted [-Wunneeded-internal-declaration] check_shadow_context_ppgtt(struct execlist_ring_context *c, struct intel_vgpu_mm *m) ^ 1 warning generated. This warning is similar to -Wunused-function but rather than warning that the function is completely unused, it warns that it is used in some expression within the file but that expression will be evaluated to a constant or be optimized away in the final assembly, essentially making it appeared used but really isn't. Usually, this happens when a function or variable is only used in sizeof, where it will appear to be used but will be evaluated at compile time and not be required to be emitted. In this case, the function is only used in GEM_BUG_ON, which is defined as BUILD_BUG_ON_INVALID, which intentionally follows this pattern. To fix this warning, add __maybe_unused to make it clear that this is intentional depending on the configuration. Fixes: bec3df930fbd ("drm/i915/gvt: Support PPGTT table load command") Link: https://github.com/ClangBuiltLinux/linux/issues/1027 Acked-by: Zhenyu Wang Signed-off-by: Nathan Chancellor Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200516023545.3332334-1-natechancellor@gmail.com --- drivers/gpu/drm/i915/gvt/scheduler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index c00189432b58..3a9bd8e4d8db 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -876,7 +876,7 @@ static void update_guest_pdps(struct intel_vgpu *vgpu, gpa + i * 8, &pdp[7 - i], 4); } -static bool +static __maybe_unused bool check_shadow_context_ppgtt(struct execlist_ring_context *c, struct intel_vgpu_mm *m) { if (m->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { -- cgit v1.2.3-70-g09d2 From cb7ee52284a244fd14caec73df0d49e02891aac4 Mon Sep 17 00:00:00 2001 From: Aishwarya Ramakrishnan Date: Mon, 18 May 2020 20:33:36 +0530 Subject: drm/i915/gvt: Use ARRAY_SIZE for vgpu_types Prefer ARRAY_SIZE instead of using sizeof Fixes coccicheck warning: Use ARRAY_SIZE Reviewed-by: Chris Wilson Signed-off-by: Aishwarya Ramakrishnan Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200518150336.15265-1-aishwaryarj100@gmail.com --- drivers/gpu/drm/i915/gvt/vgpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 1d5ff88078bd..7d361623ff67 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -124,7 +124,7 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) */ low_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE; high_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE; - num_types = sizeof(vgpu_types) / sizeof(vgpu_types[0]); + num_types = ARRAY_SIZE(vgpu_types); gvt->types = kcalloc(num_types, sizeof(struct intel_vgpu_type), GFP_KERNEL); -- cgit v1.2.3-70-g09d2 From 0fad590fd98f308c903fe8205c0f07a649a0b72e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 19 May 2020 07:31:12 +0100 Subject: drm/i915: Don't set queue-priority hint when supressing the reschedule We recorded the execlists->queue_priority_hint update for the inflight request without kicking the tasklet. The next submitted request then failed to be scheduled as it had a lower priority than the hint, leaving the HW running with only the inflight request. Fixes: 6cebcf746f3f ("drm/i915: Tweak scheduler's kick_submission()") Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200519063123.20673-1-chris@chris-wilson.co.uk (cherry picked from commit b86fc6e5e89e5645b43f57171c26740ef38f9f4a) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_scheduler.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index f4ea318781f0..cbb880b10c65 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -209,14 +209,6 @@ static void kick_submission(struct intel_engine_cs *engine, if (!inflight) goto unlock; - ENGINE_TRACE(engine, - "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n", - prio, - rq->fence.context, rq->fence.seqno, - inflight->fence.context, inflight->fence.seqno, - inflight->sched.attr.priority); - engine->execlists.queue_priority_hint = prio; - /* * If we are already the currently executing context, don't * bother evaluating if we should preempt ourselves. @@ -224,6 +216,14 @@ static void kick_submission(struct intel_engine_cs *engine, if (inflight->context == rq->context) goto unlock; + ENGINE_TRACE(engine, + "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n", + prio, + rq->fence.context, rq->fence.seqno, + inflight->fence.context, inflight->fence.seqno, + inflight->sched.attr.priority); + + engine->execlists.queue_priority_hint = prio; if (need_preempt(prio, rq_prio(inflight))) tasklet_hi_schedule(&engine->execlists.tasklet); -- cgit v1.2.3-70-g09d2 From b7ccc7858a33ddb3c5516f39b104a9156957c8bb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 May 2020 08:30:48 +0100 Subject: drm/i915/gt: Remove errant assertion in __intel_context_do_pin This assertion was removed in commit b412c63f1cba ("drm/i915/gt: Report context-is-closed prior to pinning"), but accidentally restored by a cherry-pick into drm-next and now has percolated back to drm-intel-next-queued. Fixes: 2e46a2a0b014 ("drm/i915: Use explicit flag to mark unreachable intel_context") Fixes: 2b703bbda271 ("Merge drm/drm-next into drm-intel-next-queued") References: b412c63f1cba ("drm/i915/gt: Report context-is-closed prior to pinning") Signed-off-by: Chris Wilson Cc: Rodrigo Vivi Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20200520073048.2394034-1-chris@chris-wilson.co.uk (cherry picked from commit f2c1061a3677b400a945d9238f17bf33d669acff) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_context.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 74ddb49b2941..e4aece20bc80 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -97,8 +97,6 @@ int __intel_context_do_pin(struct intel_context *ce) { int err; - GEM_BUG_ON(intel_context_is_closed(ce)); - if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { err = intel_context_alloc_state(ce); if (err) -- cgit v1.2.3-70-g09d2 From 9ef36fc2d0347f04f75b9d70c0ecc2e3b403bb7f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 21 May 2020 15:06:16 +0100 Subject: drm/i915: Disable semaphore inter-engine sync without timeslicing Since the removal of the no-semaphore boosting, we rely on timeslicing to reorder passed inter-dependency hogs across the engines. However, we require preemption to support timeslicing into user payloads, and not all machine support preemption so we do not universally enable timeslicing, even when it would correctly preempt our own inter-engine semaphores. Since timeslicing and semaphore priority deboosting is now disabled on Broadwell/Braswell, we have to follow suite and not use semaphores. Testcase: igt/gem_exec_schedule/semaphore-codependency # bdw/bsw Fixes: 18e4af04d218 ("drm/i915: Drop no-semaphore boosting") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200521140617.30015-1-chris@chris-wilson.co.uk (cherry picked from commit 0eb670aac27b1d615004c29efec595616e3e091a) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 900ea8b7fc8f..f5d59d18cd5b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -230,7 +230,7 @@ static void intel_context_set_gem(struct intel_context *ce, ce->timeline = intel_timeline_get(ctx->timeline); if (ctx->sched.priority >= I915_PRIORITY_NORMAL && - intel_engine_has_semaphores(ce->engine)) + intel_engine_has_timeslices(ce->engine)) __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); } @@ -1969,7 +1969,7 @@ static int __apply_priority(struct intel_context *ce, void *arg) { struct i915_gem_context *ctx = arg; - if (!intel_engine_has_semaphores(ce->engine)) + if (!intel_engine_has_timeslices(ce->engine)) return 0; if (ctx->sched.priority >= I915_PRIORITY_NORMAL) -- cgit v1.2.3-70-g09d2 From ef29440b3ccb93f44cf664311b30fbd5f84d9683 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 21 May 2020 15:06:17 +0100 Subject: drm/i915: Avoid using rq->engine after free during i915_fence_release In order to be valid to dereference during the i915_fence_release, after retiring the fence and releasing its refererences, we assume that rq->engine can only be a real engine (that stay intact until the device is shutdown after all fences have been flushed). However, due to a quirk of preempt-to-busy, we may retire a request that still belongs to a virtual engine and so eventually free it with rq->engine being invalid. To avoid dereferencing that invalid engine, we look at the execution_mask which if it indicates it may be executed on more than one engine, we know it originated on a virtual engine and may still be on one. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1906 Fixes: 43acd6516ca9 ("drm/i915: Keep a per-engine request pool") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200521140617.30015-2-chris@chris-wilson.co.uk (cherry picked from commit 32a4605b38c30689a6a18f3f4c7d3133ac9d3277) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_request.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 526c1e9acbd5..c282719ad3ac 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -121,8 +121,39 @@ static void i915_fence_release(struct dma_fence *fence) i915_sw_fence_fini(&rq->submit); i915_sw_fence_fini(&rq->semaphore); - /* Keep one request on each engine for reserved use under mempressure */ - if (!cmpxchg(&rq->engine->request_pool, NULL, rq)) + /* + * Keep one request on each engine for reserved use under mempressure + * + * We do not hold a reference to the engine here and so have to be + * very careful in what rq->engine we poke. The virtual engine is + * referenced via the rq->context and we released that ref during + * i915_request_retire(), ergo we must not dereference a virtual + * engine here. Not that we would want to, as the only consumer of + * the reserved engine->request_pool is the power management parking, + * which must-not-fail, and that is only run on the physical engines. + * + * Since the request must have been executed to be have completed, + * we know that it will have been processed by the HW and will + * not be unsubmitted again, so rq->engine and rq->execution_mask + * at this point is stable. rq->execution_mask will be a single + * bit if the last and _only_ engine it could execution on was a + * physical engine, if it's multiple bits then it started on and + * could still be on a virtual engine. Thus if the mask is not a + * power-of-two we assume that rq->engine may still be a virtual + * engine and so a dangling invalid pointer that we cannot dereference + * + * For example, consider the flow of a bonded request through a virtual + * engine. The request is created with a wide engine mask (all engines + * that we might execute on). On processing the bond, the request mask + * is reduced to one or more engines. If the request is subsequently + * bound to a single engine, it will then be constrained to only + * execute on that engine and never returned to the virtual engine + * after timeslicing away, see __unwind_incomplete_requests(). Thus we + * know that if the rq->execution_mask is a single bit, rq->engine + * can be a physical engine with the exact corresponding mask. + */ + if (is_power_of_2(rq->execution_mask) && + !cmpxchg(&rq->engine->request_pool, NULL, rq)) return; kmem_cache_free(global.slab_requests, rq); -- cgit v1.2.3-70-g09d2 From 757a9395f33c51c4e6eff2c7c0fbd50226a58224 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 22 May 2020 14:27:06 +0100 Subject: drm/i915/gem: Avoid iterating an empty list Our __sgt_iter assumes that the scattergather list has at least one element. But during construction we may fail in allocating the first page, and so mark the first element as the terminator. This is unexpected! [22555.524752] RIP: 0010:shmem_get_pages+0x506/0x710 [i915] [22555.524759] Code: 49 8b 2c 24 31 c0 66 89 44 24 40 48 85 ed 0f 84 62 01 00 00 4c 8b 75 00 8b 5d 08 44 8b 7d 0c 48 8b 0d 7e 34 07 e2 49 83 e6 fc <49> 8b 16 41 01 df 48 89 cf 48 89 d0 48 c1 e8 2d 48 85 c9 0f 84 c8 [22555.524765] RSP: 0018:ffffc9000053f9d0 EFLAGS: 00010246 [22555.524770] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff8881ffffa000 [22555.524774] RDX: fffffffffffffff4 RSI: ffffffffffffffff RDI: ffffffff821efe00 [22555.524778] RBP: ffff8881b099ab00 R08: 0000000000000000 R09: 00000000fffffff4 [22555.524782] R10: 0000000000000002 R11: 00000000ffec0a02 R12: ffff8881cd3c8d60 [22555.524786] R13: 00000000fffffff4 R14: 0000000000000000 R15: 0000000000000000 [22555.524790] FS: 00007f4fbeb9b9c0(0000) GS:ffff8881f8580000(0000) knlGS:0000000000000000 [22555.524795] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [22555.524799] CR2: 0000000000000000 CR3: 00000001ec7f0004 CR4: 00000000001606e0 [22555.524803] Call Trace: [22555.524919] __i915_gem_object_get_pages+0x4f/0x60 [i915] Fixes: 85d1225ec066 ("drm/i915: Introduce & use new lightweight SGL iterators") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Tvrtko Ursulin Cc: # v4.8+ Reviewed-by: Matthew Auld Reviewed-by: Maciej Patelczyk Link: https://patchwork.freedesktop.org/patch/msgid/20200522132706.5133-1-chris@chris-wilson.co.uk (cherry picked from commit 957ad9a02be6faa87594c58ac09460cd3d190d0e) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 5d5d7eef3f43..7aff3514d97a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -39,7 +39,6 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) unsigned long last_pfn = 0; /* suppress gcc warning */ unsigned int max_segment = i915_sg_segment_size(); unsigned int sg_page_sizes; - struct pagevec pvec; gfp_t noreclaim; int ret; @@ -192,13 +191,17 @@ err_sg: sg_mark_end(sg); err_pages: mapping_clear_unevictable(mapping); - pagevec_init(&pvec); - for_each_sgt_page(page, sgt_iter, st) { - if (!pagevec_add(&pvec, page)) + if (sg != st->sgl) { + struct pagevec pvec; + + pagevec_init(&pvec); + for_each_sgt_page(page, sgt_iter, st) { + if (!pagevec_add(&pvec, page)) + check_release_pagevec(&pvec); + } + if (pagevec_count(&pvec)) check_release_pagevec(&pvec); } - if (pagevec_count(&pvec)) - check_release_pagevec(&pvec); sg_free_table(st); kfree(st); -- cgit v1.2.3-70-g09d2 From dd873dd51d8d7868887048545d48befdd6bea7ad Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 May 2020 10:07:52 +0100 Subject: drm/i915: Reorder await_execution before await_request Reorder the code so that we can reuse the await_execution from a special case in await_request in the next patch. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200526090753.11329-1-chris@chris-wilson.co.uk (cherry picked from commit ffb0c600c240103f6f34e07892a7e0a75502b243) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_request.c | 264 ++++++++++++++++++------------------ 1 file changed, 132 insertions(+), 132 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c282719ad3ac..33bbad623e02 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1053,37 +1053,91 @@ await_fence: I915_FENCE_GFP); } +static bool intel_timeline_sync_has_start(struct intel_timeline *tl, + struct dma_fence *fence) +{ + return __intel_timeline_sync_is_later(tl, + fence->context, + fence->seqno - 1); +} + +static int intel_timeline_sync_set_start(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1); +} + static int -i915_request_await_request(struct i915_request *to, struct i915_request *from) +__i915_request_await_execution(struct i915_request *to, + struct i915_request *from, + void (*hook)(struct i915_request *rq, + struct dma_fence *signal)) { - int ret; + int err; - GEM_BUG_ON(to == from); - GEM_BUG_ON(to->timeline == from->timeline); + GEM_BUG_ON(intel_context_is_barrier(from->context)); - if (i915_request_completed(from)) { - i915_sw_fence_set_error_once(&to->submit, from->fence.error); + /* Submit both requests at the same time */ + err = __await_execution(to, from, hook, I915_FENCE_GFP); + if (err) + return err; + + /* Squash repeated depenendices to the same timelines */ + if (intel_timeline_sync_has_start(i915_request_timeline(to), + &from->fence)) return 0; + + /* + * Wait until the start of this request. + * + * The execution cb fires when we submit the request to HW. But in + * many cases this may be long before the request itself is ready to + * run (consider that we submit 2 requests for the same context, where + * the request of interest is behind an indefinite spinner). So we hook + * up to both to reduce our queues and keep the execution lag minimised + * in the worst case, though we hope that the await_start is elided. + */ + err = i915_request_await_start(to, from); + if (err < 0) + return err; + + /* + * Ensure both start together [after all semaphores in signal] + * + * Now that we are queued to the HW at roughly the same time (thanks + * to the execute cb) and are ready to run at roughly the same time + * (thanks to the await start), our signaler may still be indefinitely + * delayed by waiting on a semaphore from a remote engine. If our + * signaler depends on a semaphore, so indirectly do we, and we do not + * want to start our payload until our signaler also starts theirs. + * So we wait. + * + * However, there is also a second condition for which we need to wait + * for the precise start of the signaler. Consider that the signaler + * was submitted in a chain of requests following another context + * (with just an ordinary intra-engine fence dependency between the + * two). In this case the signaler is queued to HW, but not for + * immediate execution, and so we must wait until it reaches the + * active slot. + */ + if (intel_engine_has_semaphores(to->engine) && + !i915_request_has_initial_breadcrumb(to)) { + err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); + if (err < 0) + return err; } + /* Couple the dependency tree for PI on this exposed to->fence */ if (to->engine->schedule) { - ret = i915_sched_node_add_dependency(&to->sched, + err = i915_sched_node_add_dependency(&to->sched, &from->sched, - I915_DEPENDENCY_EXTERNAL); - if (ret < 0) - return ret; + I915_DEPENDENCY_WEAK); + if (err < 0) + return err; } - if (to->engine == from->engine) - ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, - &from->submit, - I915_FENCE_GFP); - else - ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); - if (ret < 0) - return ret; - - return 0; + return intel_timeline_sync_set_start(i915_request_timeline(to), + &from->fence); } static void mark_external(struct i915_request *rq) @@ -1136,23 +1190,20 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) } int -i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) +i915_request_await_execution(struct i915_request *rq, + struct dma_fence *fence, + void (*hook)(struct i915_request *rq, + struct dma_fence *signal)) { struct dma_fence **child = &fence; unsigned int nchild = 1; int ret; - /* - * Note that if the fence-array was created in signal-on-any mode, - * we should *not* decompose it into its individual fences. However, - * we don't currently store which mode the fence-array is operating - * in. Fortunately, the only user of signal-on-any is private to - * amdgpu and we should not see any incoming fence-array from - * sync-file being in signal-on-any mode. - */ if (dma_fence_is_array(fence)) { struct dma_fence_array *array = to_dma_fence_array(fence); + /* XXX Error for signal-on-any fence arrays */ + child = array->fences; nchild = array->num_fences; GEM_BUG_ON(!nchild); @@ -1165,138 +1216,78 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) continue; } - /* - * Requests on the same timeline are explicitly ordered, along - * with their dependencies, by i915_request_add() which ensures - * that requests are submitted in-order through each ring. - */ if (fence->context == rq->fence.context) continue; - /* Squash repeated waits to the same timelines */ - if (fence->context && - intel_timeline_sync_is_later(i915_request_timeline(rq), - fence)) - continue; + /* + * We don't squash repeated fence dependencies here as we + * want to run our callback in all cases. + */ if (dma_fence_is_i915(fence)) - ret = i915_request_await_request(rq, to_request(fence)); + ret = __i915_request_await_execution(rq, + to_request(fence), + hook); else ret = i915_request_await_external(rq, fence); if (ret < 0) return ret; - - /* Record the latest fence used against each timeline */ - if (fence->context) - intel_timeline_sync_set(i915_request_timeline(rq), - fence); } while (--nchild); return 0; } -static bool intel_timeline_sync_has_start(struct intel_timeline *tl, - struct dma_fence *fence) -{ - return __intel_timeline_sync_is_later(tl, - fence->context, - fence->seqno - 1); -} - -static int intel_timeline_sync_set_start(struct intel_timeline *tl, - const struct dma_fence *fence) -{ - return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1); -} - static int -__i915_request_await_execution(struct i915_request *to, - struct i915_request *from, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal)) +i915_request_await_request(struct i915_request *to, struct i915_request *from) { - int err; - - GEM_BUG_ON(intel_context_is_barrier(from->context)); + int ret; - /* Submit both requests at the same time */ - err = __await_execution(to, from, hook, I915_FENCE_GFP); - if (err) - return err; + GEM_BUG_ON(to == from); + GEM_BUG_ON(to->timeline == from->timeline); - /* Squash repeated depenendices to the same timelines */ - if (intel_timeline_sync_has_start(i915_request_timeline(to), - &from->fence)) + if (i915_request_completed(from)) { + i915_sw_fence_set_error_once(&to->submit, from->fence.error); return 0; - - /* - * Wait until the start of this request. - * - * The execution cb fires when we submit the request to HW. But in - * many cases this may be long before the request itself is ready to - * run (consider that we submit 2 requests for the same context, where - * the request of interest is behind an indefinite spinner). So we hook - * up to both to reduce our queues and keep the execution lag minimised - * in the worst case, though we hope that the await_start is elided. - */ - err = i915_request_await_start(to, from); - if (err < 0) - return err; - - /* - * Ensure both start together [after all semaphores in signal] - * - * Now that we are queued to the HW at roughly the same time (thanks - * to the execute cb) and are ready to run at roughly the same time - * (thanks to the await start), our signaler may still be indefinitely - * delayed by waiting on a semaphore from a remote engine. If our - * signaler depends on a semaphore, so indirectly do we, and we do not - * want to start our payload until our signaler also starts theirs. - * So we wait. - * - * However, there is also a second condition for which we need to wait - * for the precise start of the signaler. Consider that the signaler - * was submitted in a chain of requests following another context - * (with just an ordinary intra-engine fence dependency between the - * two). In this case the signaler is queued to HW, but not for - * immediate execution, and so we must wait until it reaches the - * active slot. - */ - if (intel_engine_has_semaphores(to->engine) && - !i915_request_has_initial_breadcrumb(to)) { - err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); - if (err < 0) - return err; } - /* Couple the dependency tree for PI on this exposed to->fence */ if (to->engine->schedule) { - err = i915_sched_node_add_dependency(&to->sched, + ret = i915_sched_node_add_dependency(&to->sched, &from->sched, - I915_DEPENDENCY_WEAK); - if (err < 0) - return err; + I915_DEPENDENCY_EXTERNAL); + if (ret < 0) + return ret; } - return intel_timeline_sync_set_start(i915_request_timeline(to), - &from->fence); + if (to->engine == READ_ONCE(from->engine)) + ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, + &from->submit, + I915_FENCE_GFP); + else + ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); + if (ret < 0) + return ret; + + return 0; } int -i915_request_await_execution(struct i915_request *rq, - struct dma_fence *fence, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal)) +i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) { struct dma_fence **child = &fence; unsigned int nchild = 1; int ret; + /* + * Note that if the fence-array was created in signal-on-any mode, + * we should *not* decompose it into its individual fences. However, + * we don't currently store which mode the fence-array is operating + * in. Fortunately, the only user of signal-on-any is private to + * amdgpu and we should not see any incoming fence-array from + * sync-file being in signal-on-any mode. + */ if (dma_fence_is_array(fence)) { struct dma_fence_array *array = to_dma_fence_array(fence); - /* XXX Error for signal-on-any fence arrays */ - child = array->fences; nchild = array->num_fences; GEM_BUG_ON(!nchild); @@ -1309,22 +1300,31 @@ i915_request_await_execution(struct i915_request *rq, continue; } + /* + * Requests on the same timeline are explicitly ordered, along + * with their dependencies, by i915_request_add() which ensures + * that requests are submitted in-order through each ring. + */ if (fence->context == rq->fence.context) continue; - /* - * We don't squash repeated fence dependencies here as we - * want to run our callback in all cases. - */ + /* Squash repeated waits to the same timelines */ + if (fence->context && + intel_timeline_sync_is_later(i915_request_timeline(rq), + fence)) + continue; if (dma_fence_is_i915(fence)) - ret = __i915_request_await_execution(rq, - to_request(fence), - hook); + ret = i915_request_await_request(rq, to_request(fence)); else ret = i915_request_await_external(rq, fence); if (ret < 0) return ret; + + /* Record the latest fence used against each timeline */ + if (fence->context) + intel_timeline_sync_set(i915_request_timeline(rq), + fence); } while (--nchild); return 0; -- cgit v1.2.3-70-g09d2 From 631a6582b75ffac82bee76a65217caa856fafb5e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 May 2020 10:07:53 +0100 Subject: drm/i915/gt: Do not schedule normal requests immediately along virtual When we push a virtual request onto the HW, we update the rq->engine to point to the physical engine. A request that is then submitted by the user that waits upon the virtual engine, but along the physical engine in use, will then see that it is due to be submitted to the same engine and take a shortcut (and be queued without waiting for the completion fence). However, the virtual request may be preempted (either by higher priority users, or by timeslicing) and removed from the physical engine to be migrated over to one of its siblings. The dependent normal request however is oblivious to the removal of the virtual request and remains queued to execute on HW, believing that once it reaches the head of its queue all of its predecessors will have completed executing! v2: Beware restriction of signal->execution_mask prior to submission. Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine") Testcase: igt/gem_exec_balancer/sliced Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: # v5.3+ Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200526090753.11329-2-chris@chris-wilson.co.uk (cherry picked from commit 511b6d9aed417739b6aa49d0b6b4354ad21020f1) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 33bbad623e02..0b07ccc7e9bc 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1237,6 +1237,25 @@ i915_request_await_execution(struct i915_request *rq, return 0; } +static int +await_request_submit(struct i915_request *to, struct i915_request *from) +{ + /* + * If we are waiting on a virtual engine, then it may be + * constrained to execute on a single engine *prior* to submission. + * When it is submitted, it will be first submitted to the virtual + * engine and then passed to the physical engine. We cannot allow + * the waiter to be submitted immediately to the physical engine + * as it may then bypass the virtual request. + */ + if (to->engine == READ_ONCE(from->engine)) + return i915_sw_fence_await_sw_fence_gfp(&to->submit, + &from->submit, + I915_FENCE_GFP); + else + return __i915_request_await_execution(to, from, NULL); +} + static int i915_request_await_request(struct i915_request *to, struct i915_request *from) { @@ -1258,10 +1277,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return ret; } - if (to->engine == READ_ONCE(from->engine)) - ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, - &from->submit, - I915_FENCE_GFP); + if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask))) + ret = await_request_submit(to, from); else ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); if (ret < 0) -- cgit v1.2.3-70-g09d2 From 0e386959272e4adf192867681a65cc0aa580c247 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 29 May 2020 15:39:26 +0100 Subject: drm/i915: Check for awaits on still currently executing requests With the advent of preempt-to-busy, a request may still be on the GPU as we unwind. And in the case of a unpreemptible [due to HW] request, that request will remain indefinitely on the GPU even though we have returned it back to our submission queue, and cleared the active bit. We only run the execution callbacks on transferring the request from our submission queue to the execution queue, but if this is a bonded request that the HW is waiting for, we will not submit it (as we wait for a fresh execution) even though it is still being executed. As we know that there are always preemption points between requests, we know that only the currently executing request may be still active even though we have cleared the flag. However, we do not precisely know which request is in ELSP[0] due to a delay in processing events, and furthermore we only store the last request in a context in our state tracker. Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy") Testcase: igt/gem_exec_balancer/bonded-dual Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200529143926.3245-1-chris@chris-wilson.co.uk (cherry picked from commit b55230e5e800868961fc271b26d9ce53ae1f691e) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_request.c | 49 ++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 0b07ccc7e9bc..def62100e666 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -357,6 +357,53 @@ void i915_request_retire_upto(struct i915_request *rq) } while (i915_request_retire(tmp) && tmp != rq); } +static struct i915_request * const * +__engine_active(struct intel_engine_cs *engine) +{ + return READ_ONCE(engine->execlists.active); +} + +static bool __request_in_flight(const struct i915_request *signal) +{ + struct i915_request * const *port, *rq; + bool inflight = false; + + if (!i915_request_is_ready(signal)) + return false; + + /* + * Even if we have unwound the request, it may still be on + * the GPU (preempt-to-busy). If that request is inside an + * unpreemptible critical section, it will not be removed. Some + * GPU functions may even be stuck waiting for the paired request + * (__await_execution) to be submitted and cannot be preempted + * until the bond is executing. + * + * As we know that there are always preemption points between + * requests, we know that only the currently executing request + * may be still active even though we have cleared the flag. + * However, we can't rely on our tracking of ELSP[0] to known + * which request is currently active and so maybe stuck, as + * the tracking maybe an event behind. Instead assume that + * if the context is still inflight, then it is still active + * even if the active flag has been cleared. + */ + if (!intel_context_inflight(signal->context)) + return false; + + rcu_read_lock(); + for (port = __engine_active(signal->engine); (rq = *port); port++) { + if (rq->context == signal->context) { + inflight = i915_seqno_passed(rq->fence.seqno, + signal->fence.seqno); + break; + } + } + rcu_read_unlock(); + + return inflight; +} + static int __await_execution(struct i915_request *rq, struct i915_request *signal, @@ -387,7 +434,7 @@ __await_execution(struct i915_request *rq, } spin_lock_irq(&signal->lock); - if (i915_request_is_active(signal)) { + if (i915_request_is_active(signal) || __request_in_flight(signal)) { if (hook) { hook(rq, &signal->fence); i915_request_put(signal); -- cgit v1.2.3-70-g09d2 From 882f38b7f6c406e7229e72ee4328b7f1d5938ae7 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 27 May 2020 23:02:45 +0300 Subject: drm/i915: Fix global state use-after-frees with a refcount MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While the current locking/serialization of the global state suffices for protecting the obj->state access and the actual hardware reprogramming, we do have a problem with accessing the old/new states during nonblocking commits. The state computation and swap will be protected by the crtc locks, but the commit_tails can finish out of order, thus also causing the atomic states to be cleaned up out of order. This would mean the commit that started first but finished last has had its new state freed as the no-longer-needed old state by the other commit. To fix this let's just refcount the states. obj->state amounts to one reference, and the intel_atomic_state holds extra references to both its new and old global obj states. Fixes: 0ef1905ecf2e ("drm/i915: Introduce better global state handling") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200527200245.13184-1-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy (cherry picked from commit f8c86ffa2800adc80adc679c84c45e0c6b027374) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_global_state.c | 45 ++++++++++++++++++++--- drivers/gpu/drm/i915/display/intel_global_state.h | 3 ++ 2 files changed, 42 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_global_state.c b/drivers/gpu/drm/i915/display/intel_global_state.c index 212d4ee68205..7a19215ad844 100644 --- a/drivers/gpu/drm/i915/display/intel_global_state.c +++ b/drivers/gpu/drm/i915/display/intel_global_state.c @@ -10,6 +10,28 @@ #include "intel_display_types.h" #include "intel_global_state.h" +static void __intel_atomic_global_state_free(struct kref *kref) +{ + struct intel_global_state *obj_state = + container_of(kref, struct intel_global_state, ref); + struct intel_global_obj *obj = obj_state->obj; + + obj->funcs->atomic_destroy_state(obj, obj_state); +} + +static void intel_atomic_global_state_put(struct intel_global_state *obj_state) +{ + kref_put(&obj_state->ref, __intel_atomic_global_state_free); +} + +static struct intel_global_state * +intel_atomic_global_state_get(struct intel_global_state *obj_state) +{ + kref_get(&obj_state->ref); + + return obj_state; +} + void intel_atomic_global_obj_init(struct drm_i915_private *dev_priv, struct intel_global_obj *obj, struct intel_global_state *state, @@ -17,6 +39,10 @@ void intel_atomic_global_obj_init(struct drm_i915_private *dev_priv, { memset(obj, 0, sizeof(*obj)); + state->obj = obj; + + kref_init(&state->ref); + obj->state = state; obj->funcs = funcs; list_add_tail(&obj->head, &dev_priv->global_obj_list); @@ -28,7 +54,9 @@ void intel_atomic_global_obj_cleanup(struct drm_i915_private *dev_priv) list_for_each_entry_safe(obj, next, &dev_priv->global_obj_list, head) { list_del(&obj->head); - obj->funcs->atomic_destroy_state(obj, obj->state); + + drm_WARN_ON(&dev_priv->drm, kref_read(&obj->state->ref) != 1); + intel_atomic_global_state_put(obj->state); } } @@ -97,10 +125,14 @@ intel_atomic_get_global_obj_state(struct intel_atomic_state *state, if (!obj_state) return ERR_PTR(-ENOMEM); + obj_state->obj = obj; obj_state->changed = false; + kref_init(&obj_state->ref); + state->global_objs[index].state = obj_state; - state->global_objs[index].old_state = obj->state; + state->global_objs[index].old_state = + intel_atomic_global_state_get(obj->state); state->global_objs[index].new_state = obj_state; state->global_objs[index].ptr = obj; obj_state->state = state; @@ -163,7 +195,9 @@ void intel_atomic_swap_global_state(struct intel_atomic_state *state) new_obj_state->state = NULL; state->global_objs[i].state = old_obj_state; - obj->state = new_obj_state; + + intel_atomic_global_state_put(obj->state); + obj->state = intel_atomic_global_state_get(new_obj_state); } } @@ -172,10 +206,9 @@ void intel_atomic_clear_global_state(struct intel_atomic_state *state) int i; for (i = 0; i < state->num_global_objs; i++) { - struct intel_global_obj *obj = state->global_objs[i].ptr; + intel_atomic_global_state_put(state->global_objs[i].old_state); + intel_atomic_global_state_put(state->global_objs[i].new_state); - obj->funcs->atomic_destroy_state(obj, - state->global_objs[i].state); state->global_objs[i].ptr = NULL; state->global_objs[i].state = NULL; state->global_objs[i].old_state = NULL; diff --git a/drivers/gpu/drm/i915/display/intel_global_state.h b/drivers/gpu/drm/i915/display/intel_global_state.h index e6163a469029..1f16fa3073c9 100644 --- a/drivers/gpu/drm/i915/display/intel_global_state.h +++ b/drivers/gpu/drm/i915/display/intel_global_state.h @@ -6,6 +6,7 @@ #ifndef __INTEL_GLOBAL_STATE_H__ #define __INTEL_GLOBAL_STATE_H__ +#include #include struct drm_i915_private; @@ -54,7 +55,9 @@ struct intel_global_obj { for_each_if(obj) struct intel_global_state { + struct intel_global_obj *obj; struct intel_atomic_state *state; + struct kref ref; bool changed; }; -- cgit v1.2.3-70-g09d2 From 273500ae71711c040d258a7b3f4b6f44c368fff2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 1 Jun 2020 17:19:42 +0100 Subject: drm/i915: Whitelist context-local timestamp in the gen9 cmdparser Allow batch buffers to read their own _local_ cumulative HW runtime of their logical context. Fixes: 0f2f39758341 ("drm/i915: Add gen9 BCS cmdparsing") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: # v5.4+ Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200601161942.30854-1-chris@chris-wilson.co.uk (cherry picked from commit f9496520df11de00fbafc3cbd693b9570d600ab3) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_cmd_parser.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 189b573d02be..372354d33f55 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -572,6 +572,9 @@ struct drm_i915_reg_descriptor { #define REG32(_reg, ...) \ { .addr = (_reg), __VA_ARGS__ } +#define REG32_IDX(_reg, idx) \ + { .addr = _reg(idx) } + /* * Convenience macro for adding 64-bit registers. * @@ -669,6 +672,7 @@ static const struct drm_i915_reg_descriptor gen9_blt_regs[] = { REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), REG32(BCS_SWCTRL), REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), + REG32_IDX(RING_CTX_TIMESTAMP, BLT_RING_BASE), REG64_IDX(BCS_GPR, 0), REG64_IDX(BCS_GPR, 1), REG64_IDX(BCS_GPR, 2), -- cgit v1.2.3-70-g09d2 From ea2b383ded0f84f310fe330eb43d758f2807e728 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 Jun 2020 00:55:08 +0300 Subject: drm/i915/params: don't expose inject_probe_failure in debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parameter only makes sense as a module parameter only. Fixes: c43c5a8818d4 ("drm/i915/params: add i915 parameters to debugfs") Cc: Juha-Pekka Heikkilä Cc: Venkata Sandeep Dhanalakota Reviewed-by: Juha-Pekka Heikkila Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200601215510.18379-1-jani.nikula@intel.com (cherry picked from commit dbf4081ffb68c0d9b518a34c715a8d8681658411) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_params.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 45323732f099..4f21bfffbf0e 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -64,7 +64,7 @@ struct drm_printer; param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO), 0600) \ param(int, edp_vswing, 0, 0400) \ param(unsigned int, reset, 3, 0600) \ - param(unsigned int, inject_probe_failure, 0, 0600) \ + param(unsigned int, inject_probe_failure, 0, 0) \ param(int, fastboot, -1, 0600) \ param(int, enable_dpcd_backlight, -1, 0600) \ param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE, 0400) \ -- cgit v1.2.3-70-g09d2 From 62b6e899a0ad4863b2e799f3c10a77116e3cfadc Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 Jun 2020 00:55:09 +0300 Subject: drm/i915/params: fix i915.fake_lmem_start module param sysfs permissions fake_lmem_start does not need to be mutable via module param sysfs. It's only used during driver probe. Fixes: 1629224324b6 ("drm/i915/lmem: add the fake lmem region") Cc: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Rodrigo Vivi Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200601215510.18379-2-jani.nikula@intel.com (cherry picked from commit f322e851f20e534cf5305332a9ad5eefadb55d56) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_params.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index add00ec1f787..a3dde770226d 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -173,7 +173,7 @@ i915_param_named(enable_gvt, bool, 0400, #endif #if IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM) -i915_param_named_unsafe(fake_lmem_start, ulong, 0600, +i915_param_named_unsafe(fake_lmem_start, ulong, 0400, "Fake LMEM start offset (default: 0)"); #endif -- cgit v1.2.3-70-g09d2