summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-02-01 17:48:47 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-02-01 17:48:47 -0800
commit4bf772b14675411a69b3c807f73006de0fe4b649 (patch)
treeb841e3ba0e3429695589cb0ab73871fa12f42c38 /drivers/gpu/drm/i915/intel_ringbuffer.c
parent3879ae653a3e98380fe2daf653338830b7ca0097 (diff)
parent24b8ef699e8221d2b7f813adaab13eec053e1507 (diff)
Merge tag 'drm-for-v4.16' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie: "This seems to have been a comparatively quieter merge window, I assume due to holidays etc. The "biggest" change is AMD header cleanups, which merge/remove a bunch of them. The AMD gpu scheduler is now being made generic with the etnaviv driver wanting to reuse the code, hopefully other drivers can go in the same direction. Otherwise it's the usual lots of stuff in i915/amdgpu, not so much stuff elsewhere. Core: - Add .last_close and .output_poll_changed helpers to reduce driver footprints - Fix plane clipping - Improved debug printing support - Add panel orientation property - Update edid derived properties at edid setting - Reduction in fbdev driver footprint - Move amdgpu scheduler into core for other drivers to use. i915: - Selftest and IGT improvements - Fast boot prep work on IPS, pipe config - HW workarounds for Cannonlake, Geminilake - Cannonlake clock and HDMI2.0 fixes - GPU cache invalidation and context switch improvements - Display planes cleanup - New PMU interface for perf queries - New firmware support for KBL/SKL - Geminilake HW workaround for perforamce - Coffeelake stolen memory improvements - GPU reset robustness work - Cannonlake horizontal plane flipping - GVT work amdgpu/radeon: - RV and Vega header file cleanups (lots of lines gone!) - TTM operation context support - 48-bit GPUVM support for Vega/RV - ECC support for Vega - Resizeable BAR support - Multi-display sync support - Enable swapout for reserved BOs during allocation - S3 fixes on Raven - GPU reset cleanup and fixes - 2+1 level GPU page table amdkfd: - GFX7/8 SDMA user queues support - Hardware scheduling for multiple processes - dGPU prep work rcar: - Added R8A7743/5 support - System suspend/resume support sun4i: - Multi-plane support for YUV formats - A83T and LVDS support msm: - Devfreq support for GPU tegra: - Prep work for adding Tegra186 support - Tegra186 HDMI support - HDMI2.0 and zpos support by using generic helpers tilcdc: - Misc fixes omapdrm: - Support memory bandwidth limits - DSI command mode panel cleanups - DMM error handling exynos: - drop the old IPP subdriver. etnaviv: - Occlusion query fixes - Job handling fixes - Prep work for hooking in gpu scheduler armada: - Move closer to atomic modesetting - Allow disabling primary plane if overlay is full screen imx: - Format modifier support - Add tile prefetch to PRE - Runtime PM support for PRG ast: - fix LUT loading" * tag 'drm-for-v4.16' of git://people.freedesktop.org/~airlied/linux: (1471 commits) drm/ast: Load lut in crtc_commit drm: Check for lessee in DROP_MASTER ioctl drm: fix gpu scheduler link order drm/amd/display: Demote error print to debug print when ATOM impl missing dma-buf: fix reservation_object_wait_timeout_rcu once more v2 drm/amdgpu: Avoid leaking PM domain on driver unbind (v2) drm/amd/amdgpu: Add Polaris version check drm/amdgpu: Reenable manual GPU reset from sysfs drm/amdgpu: disable MMHUB power gating on raven drm/ttm: Don't unreserve swapped BOs that were previously reserved drm/ttm: Don't add swapped BOs to swap-LRU list drm/amdgpu: only check for ECC on Vega10 drm/amd/powerplay: Fix smu_table_entry.handle type drm/ttm: add VADDR_FLAG_UPDATED_COUNT to correctly update dma_page global count drm: Fix PANEL_ORIENTATION_QUIRKS breaking the Kconfig DRM menuconfig drm/radeon: fill in rb backend map on evergreen/ni. drm/amdgpu/gfx9: fix ngg enablement to clear gds reserved memory (v2) drm/ttm: only free pages rather than update global memory count together drm/amdgpu: fix CPU based VM updates drm/amdgpu: fix typo in amdgpu_vce_validate_bo ...
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c747
1 files changed, 339 insertions, 408 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 8da1bde442dd..e2085820b586 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -28,9 +28,12 @@
*/
#include <linux/log2.h>
+
#include <drm/drmP.h>
-#include "i915_drv.h"
#include <drm/i915_drm.h>
+
+#include "i915_drv.h"
+#include "i915_gem_render_state.h"
#include "i915_trace.h"
#include "intel_drv.h"
@@ -337,50 +340,6 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
return 0;
}
-static int
-gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
-{
- u32 flags;
- u32 *cs;
-
- cs = intel_ring_begin(req, mode & EMIT_INVALIDATE ? 12 : 6);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- flags = PIPE_CONTROL_CS_STALL;
-
- if (mode & EMIT_FLUSH) {
- flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
- flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
- flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
- flags |= PIPE_CONTROL_FLUSH_ENABLE;
- }
- if (mode & EMIT_INVALIDATE) {
- flags |= PIPE_CONTROL_TLB_INVALIDATE;
- flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
-
- /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
- cs = gen8_emit_pipe_control(cs,
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_STALL_AT_SCOREBOARD,
- 0);
- }
-
- cs = gen8_emit_pipe_control(cs, flags,
- i915_ggtt_offset(req->engine->scratch) +
- 2 * CACHELINE_BYTES);
-
- intel_ring_advance(req, cs);
-
- return 0;
-}
-
static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
@@ -424,7 +383,6 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine)
} else if (IS_GEN6(dev_priv)) {
mmio = RING_HWS_PGA_GEN6(engine->mmio_base);
} else {
- /* XXX: gen8 returns to sanity */
mmio = RING_HWS_PGA(engine->mmio_base);
}
@@ -434,13 +392,7 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine)
I915_WRITE(mmio, engine->status_page.ggtt_offset);
POSTING_READ(mmio);
- /*
- * Flush the TLB for this page
- *
- * FIXME: These two bits have disappeared on gen8, so a question
- * arises: do we still need this and if so how should we go about
- * invalidating the TLB?
- */
+ /* Flush the TLB for this page */
if (IS_GEN(dev_priv, 6, 7)) {
i915_reg_t reg = RING_INSTPM(engine->mmio_base);
@@ -480,10 +432,14 @@ static bool stop_ring(struct intel_engine_cs *engine)
}
}
- I915_WRITE_CTL(engine, 0);
+ I915_WRITE_HEAD(engine, I915_READ_TAIL(engine));
+
I915_WRITE_HEAD(engine, 0);
I915_WRITE_TAIL(engine, 0);
+ /* The ring must be empty before it is disabled */
+ I915_WRITE_CTL(engine, 0);
+
return (I915_READ_HEAD(engine) & HEAD_ADDR) == 0;
}
@@ -604,8 +560,6 @@ static void reset_ring_common(struct intel_engine_cs *engine,
struct intel_context *ce = &request->ctx->engine[engine->id];
struct i915_hw_ppgtt *ppgtt;
- /* FIXME consider gen8 reset */
-
if (ce->state) {
I915_WRITE(CCID,
i915_ggtt_offset(ce->state) |
@@ -637,6 +591,7 @@ static void reset_ring_common(struct intel_engine_cs *engine,
request->ring->head = request->postfix;
} else {
engine->legacy_active_context = NULL;
+ engine->legacy_active_ppgtt = NULL;
}
}
@@ -706,62 +661,6 @@ static int init_render_ring(struct intel_engine_cs *engine)
return init_workarounds_ring(engine);
}
-static void render_ring_cleanup(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
-
- i915_vma_unpin_and_release(&dev_priv->semaphore);
-}
-
-static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *cs)
-{
- struct drm_i915_private *dev_priv = req->i915;
- struct intel_engine_cs *waiter;
- enum intel_engine_id id;
-
- for_each_engine(waiter, dev_priv, id) {
- u64 gtt_offset = req->engine->semaphore.signal_ggtt[id];
- if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
- continue;
-
- *cs++ = GFX_OP_PIPE_CONTROL(6);
- *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE |
- PIPE_CONTROL_CS_STALL;
- *cs++ = lower_32_bits(gtt_offset);
- *cs++ = upper_32_bits(gtt_offset);
- *cs++ = req->global_seqno;
- *cs++ = 0;
- *cs++ = MI_SEMAPHORE_SIGNAL |
- MI_SEMAPHORE_TARGET(waiter->hw_id);
- *cs++ = 0;
- }
-
- return cs;
-}
-
-static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *cs)
-{
- struct drm_i915_private *dev_priv = req->i915;
- struct intel_engine_cs *waiter;
- enum intel_engine_id id;
-
- for_each_engine(waiter, dev_priv, id) {
- u64 gtt_offset = req->engine->semaphore.signal_ggtt[id];
- if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
- continue;
-
- *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
- *cs++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT;
- *cs++ = upper_32_bits(gtt_offset);
- *cs++ = req->global_seqno;
- *cs++ = MI_SEMAPHORE_SIGNAL |
- MI_SEMAPHORE_TARGET(waiter->hw_id);
- *cs++ = 0;
- }
-
- return cs;
-}
-
static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs)
{
struct drm_i915_private *dev_priv = req->i915;
@@ -844,70 +743,6 @@ static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
req->engine->semaphore.signal(req, cs));
}
-static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req,
- u32 *cs)
-{
- struct intel_engine_cs *engine = req->engine;
-
- if (engine->semaphore.signal)
- cs = engine->semaphore.signal(req, cs);
-
- *cs++ = GFX_OP_PIPE_CONTROL(6);
- *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_QW_WRITE;
- *cs++ = intel_hws_seqno_address(engine);
- *cs++ = 0;
- *cs++ = req->global_seqno;
- /* We're thrashing one dword of HWS. */
- *cs++ = 0;
- *cs++ = MI_USER_INTERRUPT;
- *cs++ = MI_NOOP;
-
- req->tail = intel_ring_offset(req, cs);
- assert_ring_tail_valid(req->ring, req->tail);
-}
-
-static const int gen8_render_emit_breadcrumb_sz = 8;
-
-/**
- * intel_ring_sync - sync the waiter to the signaller on seqno
- *
- * @waiter - ring that is waiting
- * @signaller - ring which has, or will signal
- * @seqno - seqno which the waiter will block on
- */
-
-static int
-gen8_ring_sync_to(struct drm_i915_gem_request *req,
- struct drm_i915_gem_request *signal)
-{
- struct drm_i915_private *dev_priv = req->i915;
- u64 offset = GEN8_WAIT_OFFSET(req->engine, signal->engine->id);
- struct i915_hw_ppgtt *ppgtt;
- u32 *cs;
-
- cs = intel_ring_begin(req, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT |
- MI_SEMAPHORE_SAD_GTE_SDD;
- *cs++ = signal->global_seqno;
- *cs++ = lower_32_bits(offset);
- *cs++ = upper_32_bits(offset);
- intel_ring_advance(req, cs);
-
- /* When the !RCS engines idle waiting upon a semaphore, they lose their
- * pagetables and we must reload them before executing the batch.
- * We do this on the i915_switch_context() following the wait and
- * before the dispatch.
- */
- ppgtt = req->ctx->ppgtt;
- if (ppgtt && req->engine->id != RCS)
- ppgtt->pd_dirty_rings |= intel_engine_flag(req->engine);
- return 0;
-}
-
static int
gen6_ring_sync_to(struct drm_i915_gem_request *req,
struct drm_i915_gem_request *signal)
@@ -1083,25 +918,6 @@ hsw_vebox_irq_disable(struct intel_engine_cs *engine)
gen6_mask_pm_irq(dev_priv, engine->irq_enable_mask);
}
-static void
-gen8_irq_enable(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
-
- I915_WRITE_IMR(engine,
- ~(engine->irq_enable_mask |
- engine->irq_keep_mask));
- POSTING_READ_FW(RING_IMR(engine->mmio_base));
-}
-
-static void
-gen8_irq_disable(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
-
- I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
-}
-
static int
i965_emit_bb_start(struct drm_i915_gem_request *req,
u64 offset, u32 length,
@@ -1359,12 +1175,13 @@ static int context_pin(struct i915_gem_context *ctx)
struct i915_vma *vma = ctx->engine[RCS].state;
int ret;
- /* Clear this page out of any CPU caches for coherent swap-in/out.
+ /*
+ * Clear this page out of any CPU caches for coherent swap-in/out.
* We only want to do this on the first bind so that we do not stall
* on an active context (which by nature is already on the GPU).
*/
if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
- ret = i915_gem_object_set_to_gtt_domain(vma->obj, false);
+ ret = i915_gem_object_set_to_gtt_domain(vma->obj, true);
if (ret)
return ret;
}
@@ -1379,11 +1196,34 @@ alloc_context_vma(struct intel_engine_cs *engine)
struct drm_i915_private *i915 = engine->i915;
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
+ int err;
obj = i915_gem_object_create(i915, engine->context_size);
if (IS_ERR(obj))
return ERR_CAST(obj);
+ if (engine->default_state) {
+ void *defaults, *vaddr;
+
+ vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ err = PTR_ERR(vaddr);
+ goto err_obj;
+ }
+
+ defaults = i915_gem_object_pin_map(engine->default_state,
+ I915_MAP_WB);
+ if (IS_ERR(defaults)) {
+ err = PTR_ERR(defaults);
+ goto err_map;
+ }
+
+ memcpy(vaddr, defaults, engine->context_size);
+
+ i915_gem_object_unpin_map(engine->default_state);
+ i915_gem_object_unpin_map(obj);
+ }
+
/*
* Try to make the context utilize L3 as well as LLC.
*
@@ -1405,10 +1245,18 @@ alloc_context_vma(struct intel_engine_cs *engine)
}
vma = i915_vma_instance(obj, &i915->ggtt.base, NULL);
- if (IS_ERR(vma))
- i915_gem_object_put(obj);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_obj;
+ }
return vma;
+
+err_map:
+ i915_gem_object_unpin_map(obj);
+err_obj:
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
}
static struct intel_ring *
@@ -1441,20 +1289,9 @@ intel_ring_context_pin(struct intel_engine_cs *engine,
if (ret)
goto err;
- ce->state->obj->mm.dirty = true;
ce->state->obj->pin_global++;
}
- /* The kernel context is only used as a placeholder for flushing the
- * active context. It is never used for submitting user rendering and
- * as such never requires the golden render context, and so we can skip
- * emitting it when we switch to the kernel context. This is required
- * as during eviction we cannot allocate and pin the renderstate in
- * order to initialise the context.
- */
- if (i915_gem_context_is_kernel(ctx))
- ce->initialised = true;
-
i915_gem_context_get(ctx);
out:
@@ -1548,10 +1385,194 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
intel_ring_reset(engine->buffer, 0);
}
-static int ring_request_alloc(struct drm_i915_gem_request *request)
+static inline int mi_set_context(struct drm_i915_gem_request *rq, u32 flags)
{
+ struct drm_i915_private *i915 = rq->i915;
+ struct intel_engine_cs *engine = rq->engine;
+ enum intel_engine_id id;
+ const int num_rings =
+ /* Use an extended w/a on gen7 if signalling from other rings */
+ (HAS_LEGACY_SEMAPHORES(i915) && IS_GEN7(i915)) ?
+ INTEL_INFO(i915)->num_rings - 1 :
+ 0;
+ int len;
u32 *cs;
+ flags |= MI_MM_SPACE_GTT;
+ if (IS_HASWELL(i915))
+ /* These flags are for resource streamer on HSW+ */
+ flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
+ else
+ flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
+
+ len = 4;
+ if (IS_GEN7(i915))
+ len += 2 + (num_rings ? 4*num_rings + 6 : 0);
+
+ cs = intel_ring_begin(rq, len);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
+ if (IS_GEN7(i915)) {
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+ if (num_rings) {
+ struct intel_engine_cs *signaller;
+
+ *cs++ = MI_LOAD_REGISTER_IMM(num_rings);
+ for_each_engine(signaller, i915, id) {
+ if (signaller == engine)
+ continue;
+
+ *cs++ = i915_mmio_reg_offset(
+ RING_PSMI_CTL(signaller->mmio_base));
+ *cs++ = _MASKED_BIT_ENABLE(
+ GEN6_PSMI_SLEEP_MSG_DISABLE);
+ }
+ }
+ }
+
+ *cs++ = MI_NOOP;
+ *cs++ = MI_SET_CONTEXT;
+ *cs++ = i915_ggtt_offset(rq->ctx->engine[RCS].state) | flags;
+ /*
+ * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
+ * WaMiSetContext_Hang:snb,ivb,vlv
+ */
+ *cs++ = MI_NOOP;
+
+ if (IS_GEN7(i915)) {
+ if (num_rings) {
+ struct intel_engine_cs *signaller;
+ i915_reg_t last_reg = {}; /* keep gcc quiet */
+
+ *cs++ = MI_LOAD_REGISTER_IMM(num_rings);
+ for_each_engine(signaller, i915, id) {
+ if (signaller == engine)
+ continue;
+
+ last_reg = RING_PSMI_CTL(signaller->mmio_base);
+ *cs++ = i915_mmio_reg_offset(last_reg);
+ *cs++ = _MASKED_BIT_DISABLE(
+ GEN6_PSMI_SLEEP_MSG_DISABLE);
+ }
+
+ /* Insert a delay before the next switch! */
+ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+ *cs++ = i915_mmio_reg_offset(last_reg);
+ *cs++ = i915_ggtt_offset(engine->scratch);
+ *cs++ = MI_NOOP;
+ }
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ }
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int remap_l3(struct drm_i915_gem_request *rq, int slice)
+{
+ u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice];
+ int i;
+
+ if (!remap_info)
+ return 0;
+
+ cs = intel_ring_begin(rq, GEN7_L3LOG_SIZE/4 * 2 + 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /*
+ * Note: We do not worry about the concurrent register cacheline hang
+ * here because no other code should access these registers other than
+ * at initialization time.
+ */
+ *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
+ for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
+ *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
+ *cs++ = remap_info[i];
+ }
+ *cs++ = MI_NOOP;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int switch_context(struct drm_i915_gem_request *rq)
+{
+ struct intel_engine_cs *engine = rq->engine;
+ struct i915_gem_context *to_ctx = rq->ctx;
+ struct i915_hw_ppgtt *to_mm =
+ to_ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
+ struct i915_gem_context *from_ctx = engine->legacy_active_context;
+ struct i915_hw_ppgtt *from_mm = engine->legacy_active_ppgtt;
+ u32 hw_flags = 0;
+ int ret, i;
+
+ lockdep_assert_held(&rq->i915->drm.struct_mutex);
+ GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+
+ if (to_mm != from_mm ||
+ (to_mm && intel_engine_flag(engine) & to_mm->pd_dirty_rings)) {
+ trace_switch_mm(engine, to_ctx);
+ ret = to_mm->switch_mm(to_mm, rq);
+ if (ret)
+ goto err;
+
+ to_mm->pd_dirty_rings &= ~intel_engine_flag(engine);
+ engine->legacy_active_ppgtt = to_mm;
+ hw_flags = MI_FORCE_RESTORE;
+ }
+
+ if (to_ctx->engine[engine->id].state &&
+ (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) {
+ GEM_BUG_ON(engine->id != RCS);
+
+ /*
+ * The kernel context(s) is treated as pure scratch and is not
+ * expected to retain any state (as we sacrifice it during
+ * suspend and on resume it may be corrupted). This is ok,
+ * as nothing actually executes using the kernel context; it
+ * is purely used for flushing user contexts.
+ */
+ if (i915_gem_context_is_kernel(to_ctx))
+ hw_flags = MI_RESTORE_INHIBIT;
+
+ ret = mi_set_context(rq, hw_flags);
+ if (ret)
+ goto err_mm;
+
+ engine->legacy_active_context = to_ctx;
+ }
+
+ if (to_ctx->remap_slice) {
+ for (i = 0; i < MAX_L3_SLICES; i++) {
+ if (!(to_ctx->remap_slice & BIT(i)))
+ continue;
+
+ ret = remap_l3(rq, i);
+ if (ret)
+ goto err_ctx;
+ }
+
+ to_ctx->remap_slice = 0;
+ }
+
+ return 0;
+
+err_ctx:
+ engine->legacy_active_context = from_ctx;
+err_mm:
+ engine->legacy_active_ppgtt = from_mm;
+err:
+ return ret;
+}
+
+static int ring_request_alloc(struct drm_i915_gem_request *request)
+{
+ int ret;
+
GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count);
/* Flush enough space to reduce the likelihood of waiting after
@@ -1560,37 +1581,28 @@ static int ring_request_alloc(struct drm_i915_gem_request *request)
*/
request->reserved_space += LEGACY_REQUEST_SIZE;
- cs = intel_ring_begin(request, 0);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
+ ret = intel_ring_wait_for_space(request->ring, request->reserved_space);
+ if (ret)
+ return ret;
+
+ ret = switch_context(request);
+ if (ret)
+ return ret;
request->reserved_space -= LEGACY_REQUEST_SIZE;
return 0;
}
-static noinline int wait_for_space(struct drm_i915_gem_request *req,
- unsigned int bytes)
+static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
{
- struct intel_ring *ring = req->ring;
struct drm_i915_gem_request *target;
long timeout;
- lockdep_assert_held(&req->i915->drm.struct_mutex);
+ lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex);
if (intel_ring_update_space(ring) >= bytes)
return 0;
- /*
- * Space is reserved in the ringbuffer for finalising the request,
- * as that cannot be allowed to fail. During request finalisation,
- * reserved_space is set to 0 to stop the overallocation and the
- * assumption is that then we never need to wait (which has the
- * risk of failing with EINTR).
- *
- * See also i915_gem_request_alloc() and i915_add_request().
- */
- GEM_BUG_ON(!req->reserved_space);
-
list_for_each_entry(target, &ring->request_list, ring_link) {
/* Would completion of this request free enough space? */
if (bytes <= __intel_ring_space(target->postfix,
@@ -1614,6 +1626,22 @@ static noinline int wait_for_space(struct drm_i915_gem_request *req,
return 0;
}
+int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes)
+{
+ GEM_BUG_ON(bytes > ring->effective_size);
+ if (unlikely(bytes > ring->effective_size - ring->emit))
+ bytes += ring->size - ring->emit;
+
+ if (unlikely(bytes > ring->space)) {
+ int ret = wait_for_space(ring, bytes);
+ if (unlikely(ret))
+ return ret;
+ }
+
+ GEM_BUG_ON(ring->space < bytes);
+ return 0;
+}
+
u32 *intel_ring_begin(struct drm_i915_gem_request *req,
unsigned int num_dwords)
{
@@ -1653,7 +1681,20 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req,
}
if (unlikely(total_bytes > ring->space)) {
- int ret = wait_for_space(req, total_bytes);
+ int ret;
+
+ /*
+ * Space is reserved in the ringbuffer for finalising the
+ * request, as that cannot be allowed to fail. During request
+ * finalisation, reserved_space is set to 0 to stop the
+ * overallocation and the assumption is that then we never need
+ * to wait (which has the risk of failing with EINTR).
+ *
+ * See also i915_gem_request_alloc() and i915_add_request().
+ */
+ GEM_BUG_ON(!req->reserved_space);
+
+ ret = wait_for_space(ring, total_bytes);
if (unlikely(ret))
return ERR_PTR(ret);
}
@@ -1748,8 +1789,6 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode)
return PTR_ERR(cs);
cmd = MI_FLUSH_DW;
- if (INTEL_GEN(req->i915) >= 8)
- cmd += 1;
/* We always require a command barrier so that subsequent
* commands, such as breadcrumb interrupts, are strictly ordered
@@ -1769,38 +1808,9 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode)
*cs++ = cmd;
*cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
- if (INTEL_GEN(req->i915) >= 8) {
- *cs++ = 0; /* upper addr */
- *cs++ = 0; /* value */
- } else {
- *cs++ = 0;
- *cs++ = MI_NOOP;
- }
- intel_ring_advance(req, cs);
- return 0;
-}
-
-static int
-gen8_emit_bb_start(struct drm_i915_gem_request *req,
- u64 offset, u32 len,
- unsigned int dispatch_flags)
-{
- bool ppgtt = USES_PPGTT(req->i915) &&
- !(dispatch_flags & I915_DISPATCH_SECURE);
- u32 *cs;
-
- cs = intel_ring_begin(req, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- /* FIXME(BDW): Address space and security selectors. */
- *cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags &
- I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
- *cs++ = lower_32_bits(offset);
- *cs++ = upper_32_bits(offset);
+ *cs++ = 0;
*cs++ = MI_NOOP;
intel_ring_advance(req, cs);
-
return 0;
}
@@ -1857,8 +1867,6 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode)
return PTR_ERR(cs);
cmd = MI_FLUSH_DW;
- if (INTEL_GEN(req->i915) >= 8)
- cmd += 1;
/* We always require a command barrier so that subsequent
* commands, such as breadcrumb interrupts, are strictly ordered
@@ -1877,13 +1885,8 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode)
cmd |= MI_INVALIDATE_TLB;
*cs++ = cmd;
*cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
- if (INTEL_GEN(req->i915) >= 8) {
- *cs++ = 0; /* upper addr */
- *cs++ = 0; /* value */
- } else {
- *cs++ = 0;
- *cs++ = MI_NOOP;
- }
+ *cs++ = 0;
+ *cs++ = MI_NOOP;
intel_ring_advance(req, cs);
return 0;
@@ -1892,110 +1895,61 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode)
static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
struct intel_engine_cs *engine)
{
- struct drm_i915_gem_object *obj;
- int ret, i;
+ int i;
- if (!i915_modparams.semaphores)
+ if (!HAS_LEGACY_SEMAPHORES(dev_priv))
return;
- if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore) {
- struct i915_vma *vma;
-
- obj = i915_gem_object_create(dev_priv, PAGE_SIZE);
- if (IS_ERR(obj))
- goto err;
-
- vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
- if (IS_ERR(vma))
- goto err_obj;
-
- ret = i915_gem_object_set_to_gtt_domain(obj, false);
- if (ret)
- goto err_obj;
-
- ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
- if (ret)
- goto err_obj;
-
- dev_priv->semaphore = vma;
- }
-
- if (INTEL_GEN(dev_priv) >= 8) {
- u32 offset = i915_ggtt_offset(dev_priv->semaphore);
-
- engine->semaphore.sync_to = gen8_ring_sync_to;
- engine->semaphore.signal = gen8_xcs_signal;
-
- for (i = 0; i < I915_NUM_ENGINES; i++) {
- u32 ring_offset;
-
- if (i != engine->id)
- ring_offset = offset + GEN8_SEMAPHORE_OFFSET(engine->id, i);
- else
- ring_offset = MI_SEMAPHORE_SYNC_INVALID;
-
- engine->semaphore.signal_ggtt[i] = ring_offset;
- }
- } else if (INTEL_GEN(dev_priv) >= 6) {
- engine->semaphore.sync_to = gen6_ring_sync_to;
- engine->semaphore.signal = gen6_signal;
+ GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
+ engine->semaphore.sync_to = gen6_ring_sync_to;
+ engine->semaphore.signal = gen6_signal;
- /*
- * The current semaphore is only applied on pre-gen8
- * platform. And there is no VCS2 ring on the pre-gen8
- * platform. So the semaphore between RCS and VCS2 is
- * initialized as INVALID. Gen8 will initialize the
- * sema between VCS2 and RCS later.
- */
- for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) {
- static const struct {
- u32 wait_mbox;
- i915_reg_t mbox_reg;
- } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = {
- [RCS_HW] = {
- [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC },
- [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC },
- [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
- },
- [VCS_HW] = {
- [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC },
- [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC },
- [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
- },
- [BCS_HW] = {
- [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC },
- [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC },
- [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
- },
- [VECS_HW] = {
- [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
- [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
- [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
- },
- };
+ /*
+ * The current semaphore is only applied on pre-gen8
+ * platform. And there is no VCS2 ring on the pre-gen8
+ * platform. So the semaphore between RCS and VCS2 is
+ * initialized as INVALID.
+ */
+ for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) {
+ static const struct {
u32 wait_mbox;
i915_reg_t mbox_reg;
+ } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = {
+ [RCS_HW] = {
+ [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC },
+ [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC },
+ [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
+ },
+ [VCS_HW] = {
+ [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC },
+ [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC },
+ [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
+ },
+ [BCS_HW] = {
+ [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC },
+ [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC },
+ [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
+ },
+ [VECS_HW] = {
+ [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
+ [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
+ [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
+ },
+ };
+ u32 wait_mbox;
+ i915_reg_t mbox_reg;
- if (i == engine->hw_id) {
- wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
- mbox_reg = GEN6_NOSYNC;
- } else {
- wait_mbox = sem_data[engine->hw_id][i].wait_mbox;
- mbox_reg = sem_data[engine->hw_id][i].mbox_reg;
- }
-
- engine->semaphore.mbox.wait[i] = wait_mbox;
- engine->semaphore.mbox.signal[i] = mbox_reg;
+ if (i == engine->hw_id) {
+ wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
+ mbox_reg = GEN6_NOSYNC;
+ } else {
+ wait_mbox = sem_data[engine->hw_id][i].wait_mbox;
+ mbox_reg = sem_data[engine->hw_id][i].mbox_reg;
}
- }
-
- return;
-err_obj:
- i915_gem_object_put(obj);
-err:
- DRM_DEBUG_DRIVER("Failed to allocate space for semaphores, disabling\n");
- i915_modparams.semaphores = 0;
+ engine->semaphore.mbox.wait[i] = wait_mbox;
+ engine->semaphore.mbox.signal[i] = mbox_reg;
+ }
}
static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
@@ -2003,11 +1957,7 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
{
engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << engine->irq_shift;
- if (INTEL_GEN(dev_priv) >= 8) {
- engine->irq_enable = gen8_irq_enable;
- engine->irq_disable = gen8_irq_disable;
- engine->irq_seqno_barrier = gen6_seqno_barrier;
- } else if (INTEL_GEN(dev_priv) >= 6) {
+ if (INTEL_GEN(dev_priv) >= 6) {
engine->irq_enable = gen6_irq_enable;
engine->irq_disable = gen6_irq_disable;
engine->irq_seqno_barrier = gen6_seqno_barrier;
@@ -2028,17 +1978,23 @@ static void i9xx_set_default_submission(struct intel_engine_cs *engine)
{
engine->submit_request = i9xx_submit_request;
engine->cancel_requests = cancel_requests;
+
+ engine->park = NULL;
+ engine->unpark = NULL;
}
static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
{
+ i9xx_set_default_submission(engine);
engine->submit_request = gen6_bsd_submit_request;
- engine->cancel_requests = cancel_requests;
}
static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
struct intel_engine_cs *engine)
{
+ /* gen8+ are only supported with execlists */
+ GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8);
+
intel_ring_init_irq(dev_priv, engine);
intel_ring_init_semaphores(dev_priv, engine);
@@ -2052,26 +2008,20 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
engine->emit_breadcrumb = i9xx_emit_breadcrumb;
engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
- if (i915_modparams.semaphores) {
+ if (HAS_LEGACY_SEMAPHORES(dev_priv)) {
int num_rings;
engine->emit_breadcrumb = gen6_sema_emit_breadcrumb;
num_rings = INTEL_INFO(dev_priv)->num_rings - 1;
- if (INTEL_GEN(dev_priv) >= 8) {
- engine->emit_breadcrumb_sz += num_rings * 6;
- } else {
- engine->emit_breadcrumb_sz += num_rings * 3;
- if (num_rings & 1)
- engine->emit_breadcrumb_sz++;
- }
+ engine->emit_breadcrumb_sz += num_rings * 3;
+ if (num_rings & 1)
+ engine->emit_breadcrumb_sz++;
}
engine->set_default_submission = i9xx_set_default_submission;
- if (INTEL_GEN(dev_priv) >= 8)
- engine->emit_bb_start = gen8_emit_bb_start;
- else if (INTEL_GEN(dev_priv) >= 6)
+ if (INTEL_GEN(dev_priv) >= 6)
engine->emit_bb_start = gen6_emit_bb_start;
else if (INTEL_GEN(dev_priv) >= 4)
engine->emit_bb_start = i965_emit_bb_start;
@@ -2091,20 +2041,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
if (HAS_L3_DPF(dev_priv))
engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
- if (INTEL_GEN(dev_priv) >= 8) {
- engine->init_context = intel_rcs_ctx_init;
- engine->emit_breadcrumb = gen8_render_emit_breadcrumb;
- engine->emit_breadcrumb_sz = gen8_render_emit_breadcrumb_sz;
- engine->emit_flush = gen8_render_ring_flush;
- if (i915_modparams.semaphores) {
- int num_rings;
-
- engine->semaphore.signal = gen8_rcs_signal;
-
- num_rings = INTEL_INFO(dev_priv)->num_rings - 1;
- engine->emit_breadcrumb_sz += num_rings * 8;
- }
- } else if (INTEL_GEN(dev_priv) >= 6) {
+ if (INTEL_GEN(dev_priv) >= 6) {
engine->init_context = intel_rcs_ctx_init;
engine->emit_flush = gen7_render_ring_flush;
if (IS_GEN6(dev_priv))
@@ -2123,7 +2060,6 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
engine->emit_bb_start = hsw_emit_bb_start;
engine->init_hw = init_render_ring;
- engine->cleanup = render_ring_cleanup;
ret = intel_init_ring_buffer(engine);
if (ret)
@@ -2153,8 +2089,7 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
if (IS_GEN6(dev_priv))
engine->set_default_submission = gen6_bsd_set_default_submission;
engine->emit_flush = gen6_bsd_ring_flush;
- if (INTEL_GEN(dev_priv) < 8)
- engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
+ engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
} else {
engine->mmio_base = BSD_RING_BASE;
engine->emit_flush = bsd_ring_flush;
@@ -2174,8 +2109,7 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
intel_ring_default_vfuncs(dev_priv, engine);
engine->emit_flush = gen6_ring_flush;
- if (INTEL_GEN(dev_priv) < 8)
- engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
+ engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
return intel_init_ring_buffer(engine);
}
@@ -2187,12 +2121,9 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
intel_ring_default_vfuncs(dev_priv, engine);
engine->emit_flush = gen6_ring_flush;
-
- if (INTEL_GEN(dev_priv) < 8) {
- engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
- engine->irq_enable = hsw_vebox_irq_enable;
- engine->irq_disable = hsw_vebox_irq_disable;
- }
+ engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
+ engine->irq_enable = hsw_vebox_irq_enable;
+ engine->irq_disable = hsw_vebox_irq_disable;
return intel_init_ring_buffer(engine);
}