summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/intel_engine_cs.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c')
-rw-r--r--drivers/gpu/drm/i915/intel_engine_cs.c796
1 files changed, 105 insertions, 691 deletions
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 4ba139c27fba..1a8370779bbb 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -81,13 +81,17 @@ static const struct engine_class_info intel_engine_classes[] = {
},
};
+#define MAX_MMIO_BASES 3
struct engine_info {
unsigned int hw_id;
unsigned int uabi_id;
u8 class;
u8 instance;
- u32 mmio_base;
- unsigned irq_shift;
+ /* mmio bases table *must* be sorted in reverse gen order */
+ struct engine_mmio_base {
+ u32 gen : 8;
+ u32 base : 24;
+ } mmio_bases[MAX_MMIO_BASES];
};
static const struct engine_info intel_engines[] = {
@@ -96,64 +100,76 @@ static const struct engine_info intel_engines[] = {
.uabi_id = I915_EXEC_RENDER,
.class = RENDER_CLASS,
.instance = 0,
- .mmio_base = RENDER_RING_BASE,
- .irq_shift = GEN8_RCS_IRQ_SHIFT,
+ .mmio_bases = {
+ { .gen = 1, .base = RENDER_RING_BASE }
+ },
},
[BCS] = {
.hw_id = BCS_HW,
.uabi_id = I915_EXEC_BLT,
.class = COPY_ENGINE_CLASS,
.instance = 0,
- .mmio_base = BLT_RING_BASE,
- .irq_shift = GEN8_BCS_IRQ_SHIFT,
+ .mmio_bases = {
+ { .gen = 6, .base = BLT_RING_BASE }
+ },
},
[VCS] = {
.hw_id = VCS_HW,
.uabi_id = I915_EXEC_BSD,
.class = VIDEO_DECODE_CLASS,
.instance = 0,
- .mmio_base = GEN6_BSD_RING_BASE,
- .irq_shift = GEN8_VCS1_IRQ_SHIFT,
+ .mmio_bases = {
+ { .gen = 11, .base = GEN11_BSD_RING_BASE },
+ { .gen = 6, .base = GEN6_BSD_RING_BASE },
+ { .gen = 4, .base = BSD_RING_BASE }
+ },
},
[VCS2] = {
.hw_id = VCS2_HW,
.uabi_id = I915_EXEC_BSD,
.class = VIDEO_DECODE_CLASS,
.instance = 1,
- .mmio_base = GEN8_BSD2_RING_BASE,
- .irq_shift = GEN8_VCS2_IRQ_SHIFT,
+ .mmio_bases = {
+ { .gen = 11, .base = GEN11_BSD2_RING_BASE },
+ { .gen = 8, .base = GEN8_BSD2_RING_BASE }
+ },
},
[VCS3] = {
.hw_id = VCS3_HW,
.uabi_id = I915_EXEC_BSD,
.class = VIDEO_DECODE_CLASS,
.instance = 2,
- .mmio_base = GEN11_BSD3_RING_BASE,
- .irq_shift = 0, /* not used */
+ .mmio_bases = {
+ { .gen = 11, .base = GEN11_BSD3_RING_BASE }
+ },
},
[VCS4] = {
.hw_id = VCS4_HW,
.uabi_id = I915_EXEC_BSD,
.class = VIDEO_DECODE_CLASS,
.instance = 3,
- .mmio_base = GEN11_BSD4_RING_BASE,
- .irq_shift = 0, /* not used */
+ .mmio_bases = {
+ { .gen = 11, .base = GEN11_BSD4_RING_BASE }
+ },
},
[VECS] = {
.hw_id = VECS_HW,
.uabi_id = I915_EXEC_VEBOX,
.class = VIDEO_ENHANCEMENT_CLASS,
.instance = 0,
- .mmio_base = VEBOX_RING_BASE,
- .irq_shift = GEN8_VECS_IRQ_SHIFT,
+ .mmio_bases = {
+ { .gen = 11, .base = GEN11_VEBOX_RING_BASE },
+ { .gen = 7, .base = VEBOX_RING_BASE }
+ },
},
[VECS2] = {
.hw_id = VECS2_HW,
.uabi_id = I915_EXEC_VEBOX,
.class = VIDEO_ENHANCEMENT_CLASS,
.instance = 1,
- .mmio_base = GEN11_VEBOX2_RING_BASE,
- .irq_shift = 0, /* not used */
+ .mmio_bases = {
+ { .gen = 11, .base = GEN11_VEBOX2_RING_BASE }
+ },
},
};
@@ -223,16 +239,36 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
}
}
+static u32 __engine_mmio_base(struct drm_i915_private *i915,
+ const struct engine_mmio_base *bases)
+{
+ int i;
+
+ for (i = 0; i < MAX_MMIO_BASES; i++)
+ if (INTEL_GEN(i915) >= bases[i].gen)
+ break;
+
+ GEM_BUG_ON(i == MAX_MMIO_BASES);
+ GEM_BUG_ON(!bases[i].base);
+
+ return bases[i].base;
+}
+
+static void __sprint_engine_name(char *name, const struct engine_info *info)
+{
+ WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
+ intel_engine_classes[info->class].name,
+ info->instance) >= INTEL_ENGINE_CS_MAX_NAME);
+}
+
static int
intel_engine_setup(struct drm_i915_private *dev_priv,
enum intel_engine_id id)
{
const struct engine_info *info = &intel_engines[id];
- const struct engine_class_info *class_info;
struct intel_engine_cs *engine;
GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes));
- class_info = &intel_engine_classes[info->class];
BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
@@ -253,35 +289,14 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
engine->id = id;
engine->i915 = dev_priv;
- WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u",
- class_info->name, info->instance) >=
- sizeof(engine->name));
+ __sprint_engine_name(engine->name, info);
engine->hw_id = engine->guc_id = info->hw_id;
- if (INTEL_GEN(dev_priv) >= 11) {
- switch (engine->id) {
- case VCS:
- engine->mmio_base = GEN11_BSD_RING_BASE;
- break;
- case VCS2:
- engine->mmio_base = GEN11_BSD2_RING_BASE;
- break;
- case VECS:
- engine->mmio_base = GEN11_VEBOX_RING_BASE;
- break;
- default:
- /* take the original value for all other engines */
- engine->mmio_base = info->mmio_base;
- break;
- }
- } else {
- engine->mmio_base = info->mmio_base;
- }
- engine->irq_shift = info->irq_shift;
+ engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases);
engine->class = info->class;
engine->instance = info->instance;
engine->uabi_id = info->uabi_id;
- engine->uabi_class = class_info->uabi_class;
+ engine->uabi_class = intel_engine_classes[info->class].uabi_class;
engine->context_size = __intel_engine_context_size(dev_priv,
engine->class);
@@ -441,6 +456,11 @@ static void intel_engine_init_timeline(struct intel_engine_cs *engine)
engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id];
}
+static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
+{
+ i915_gem_batch_pool_init(&engine->batch_pool, engine);
+}
+
static bool csb_force_mmio(struct drm_i915_private *i915)
{
/*
@@ -455,6 +475,9 @@ static bool csb_force_mmio(struct drm_i915_private *i915)
if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
return true;
+ if (IS_CANNONLAKE(i915))
+ return true;
+
return false;
}
@@ -485,11 +508,9 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
void intel_engine_setup_common(struct intel_engine_cs *engine)
{
intel_engine_init_execlist(engine);
-
intel_engine_init_timeline(engine);
intel_engine_init_hangcheck(engine);
- i915_gem_batch_pool_init(engine, &engine->batch_pool);
-
+ intel_engine_init_batch_pool(engine);
intel_engine_init_cmd_parser(engine);
}
@@ -782,10 +803,24 @@ static inline uint32_t
read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
int subslice, i915_reg_t reg)
{
+ uint32_t mcr_slice_subslice_mask;
+ uint32_t mcr_slice_subslice_select;
uint32_t mcr;
uint32_t ret;
enum forcewake_domains fw_domains;
+ if (INTEL_GEN(dev_priv) >= 11) {
+ mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
+ GEN11_MCR_SUBSLICE_MASK;
+ mcr_slice_subslice_select = GEN11_MCR_SLICE(slice) |
+ GEN11_MCR_SUBSLICE(subslice);
+ } else {
+ mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
+ GEN8_MCR_SUBSLICE_MASK;
+ mcr_slice_subslice_select = GEN8_MCR_SLICE(slice) |
+ GEN8_MCR_SUBSLICE(subslice);
+ }
+
fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg,
FW_REG_READ);
fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
@@ -800,14 +835,14 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
* The HW expects the slice and sublice selectors to be reset to 0
* after reading out the registers.
*/
- WARN_ON_ONCE(mcr & (GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK));
- mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
- mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
+ WARN_ON_ONCE(mcr & mcr_slice_subslice_mask);
+ mcr &= ~mcr_slice_subslice_mask;
+ mcr |= mcr_slice_subslice_select;
I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
ret = I915_READ_FW(reg);
- mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
+ mcr &= ~mcr_slice_subslice_mask;
I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
@@ -871,640 +906,6 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
}
}
-static int wa_add(struct drm_i915_private *dev_priv,
- i915_reg_t addr,
- const u32 mask, const u32 val)
-{
- const u32 idx = dev_priv->workarounds.count;
-
- if (WARN_ON(idx >= I915_MAX_WA_REGS))
- return -ENOSPC;
-
- dev_priv->workarounds.reg[idx].addr = addr;
- dev_priv->workarounds.reg[idx].value = val;
- dev_priv->workarounds.reg[idx].mask = mask;
-
- dev_priv->workarounds.count++;
-
- return 0;
-}
-
-#define WA_REG(addr, mask, val) do { \
- const int r = wa_add(dev_priv, (addr), (mask), (val)); \
- if (r) \
- return r; \
- } while (0)
-
-#define WA_SET_BIT_MASKED(addr, mask) \
- WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
-
-#define WA_CLR_BIT_MASKED(addr, mask) \
- WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
-
-#define WA_SET_FIELD_MASKED(addr, mask, value) \
- WA_REG(addr, mask, _MASKED_FIELD(mask, value))
-
-static int wa_ring_whitelist_reg(struct intel_engine_cs *engine,
- i915_reg_t reg)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- struct i915_workarounds *wa = &dev_priv->workarounds;
- const uint32_t index = wa->hw_whitelist_count[engine->id];
-
- if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS))
- return -EINVAL;
-
- I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index),
- i915_mmio_reg_offset(reg));
- wa->hw_whitelist_count[engine->id]++;
-
- return 0;
-}
-
-static int gen8_init_workarounds(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
-
- WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
-
- /* WaDisableAsyncFlipPerfMode:bdw,chv */
- WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
-
- /* WaDisablePartialInstShootdown:bdw,chv */
- WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
- PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
-
- /* Use Force Non-Coherent whenever executing a 3D context. This is a
- * workaround for for a possible hang in the unlikely event a TLB
- * invalidation occurs during a PSD flush.
- */
- /* WaForceEnableNonCoherent:bdw,chv */
- /* WaHdcDisableFetchWhenMasked:bdw,chv */
- WA_SET_BIT_MASKED(HDC_CHICKEN0,
- HDC_DONOT_FETCH_MEM_WHEN_MASKED |
- HDC_FORCE_NON_COHERENT);
-
- /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
- * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
- * polygons in the same 8x4 pixel/sample area to be processed without
- * stalling waiting for the earlier ones to write to Hierarchical Z
- * buffer."
- *
- * This optimization is off by default for BDW and CHV; turn it on.
- */
- WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
-
- /* Wa4x4STCOptimizationDisable:bdw,chv */
- WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
-
- /*
- * BSpec recommends 8x4 when MSAA is used,
- * however in practice 16x4 seems fastest.
- *
- * Note that PS/WM thread counts depend on the WIZ hashing
- * disable bit, which we don't touch here, but it's good
- * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
- */
- WA_SET_FIELD_MASKED(GEN7_GT_MODE,
- GEN6_WIZ_HASHING_MASK,
- GEN6_WIZ_HASHING_16x4);
-
- return 0;
-}
-
-static int bdw_init_workarounds(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- int ret;
-
- ret = gen8_init_workarounds(engine);
- if (ret)
- return ret;
-
- /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
- WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
-
- /* WaDisableDopClockGating:bdw
- *
- * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
- * to disable EUTC clock gating.
- */
- WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
- DOP_CLOCK_GATING_DISABLE);
-
- WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
- GEN8_SAMPLER_POWER_BYPASS_DIS);
-
- WA_SET_BIT_MASKED(HDC_CHICKEN0,
- /* WaForceContextSaveRestoreNonCoherent:bdw */
- HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
- /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
- (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
-
- return 0;
-}
-
-static int chv_init_workarounds(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- int ret;
-
- ret = gen8_init_workarounds(engine);
- if (ret)
- return ret;
-
- /* WaDisableThreadStallDopClockGating:chv */
- WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
-
- /* Improve HiZ throughput on CHV. */
- WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
-
- return 0;
-}
-
-static int gen9_init_workarounds(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- int ret;
-
- /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
- I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
-
- /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
- I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
- GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
-
- /* WaDisableKillLogic:bxt,skl,kbl */
- if (!IS_COFFEELAKE(dev_priv))
- I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
- ECOCHK_DIS_TLB);
-
- if (HAS_LLC(dev_priv)) {
- /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
- *
- * Must match Display Engine. See
- * WaCompressedResourceDisplayNewHashMode.
- */
- WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
- GEN9_PBE_COMPRESSED_HASH_SELECTION);
- WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
- GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
-
- I915_WRITE(MMCD_MISC_CTRL,
- I915_READ(MMCD_MISC_CTRL) |
- MMCD_PCLA |
- MMCD_HOTSPOT_EN);
- }
-
- /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
- /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
- WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
- FLOW_CONTROL_ENABLE |
- PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
-
- /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
- if (!IS_COFFEELAKE(dev_priv))
- WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
- GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
-
- /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
- /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
- WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
- GEN9_ENABLE_YV12_BUGFIX |
- GEN9_ENABLE_GPGPU_PREEMPTION);
-
- /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
- /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
- WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
- GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
-
- /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
- WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
- GEN9_CCS_TLB_PREFETCH_ENABLE);
-
- /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
- WA_SET_BIT_MASKED(HDC_CHICKEN0,
- HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
- HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
-
- /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
- * both tied to WaForceContextSaveRestoreNonCoherent
- * in some hsds for skl. We keep the tie for all gen9. The
- * documentation is a bit hazy and so we want to get common behaviour,
- * even though there is no clear evidence we would need both on kbl/bxt.
- * This area has been source of system hangs so we play it safe
- * and mimic the skl regardless of what bspec says.
- *
- * Use Force Non-Coherent whenever executing a 3D context. This
- * is a workaround for a possible hang in the unlikely event
- * a TLB invalidation occurs during a PSD flush.
- */
-
- /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
- WA_SET_BIT_MASKED(HDC_CHICKEN0,
- HDC_FORCE_NON_COHERENT);
-
- /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
- I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
- BDW_DISABLE_HDC_INVALIDATION);
-
- /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
- if (IS_SKYLAKE(dev_priv) ||
- IS_KABYLAKE(dev_priv) ||
- IS_COFFEELAKE(dev_priv))
- WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
- GEN8_SAMPLER_POWER_BYPASS_DIS);
-
- /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
- WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
-
- /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
- if (IS_GEN9_LP(dev_priv)) {
- u32 val = I915_READ(GEN8_L3SQCREG1);
-
- val &= ~L3_PRIO_CREDITS_MASK;
- val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2);
- I915_WRITE(GEN8_L3SQCREG1, val);
- }
-
- /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
- I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
- GEN8_LQSC_FLUSH_COHERENT_LINES));
-
- /*
- * Supporting preemption with fine-granularity requires changes in the
- * batch buffer programming. Since we can't break old userspace, we
- * need to set our default preemption level to safe value. Userspace is
- * still able to use more fine-grained preemption levels, since in
- * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
- * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
- * not real HW workarounds, but merely a way to start using preemption
- * while maintaining old contract with userspace.
- */
-
- /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
- WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
-
- /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
- WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
- GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
-
- /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
- ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
- if (ret)
- return ret;
-
- /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
- I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
- _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
- ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
- if (ret)
- return ret;
-
- /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
- ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1);
- if (ret)
- return ret;
-
- return 0;
-}
-
-static int skl_tune_iz_hashing(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- u8 vals[3] = { 0, 0, 0 };
- unsigned int i;
-
- for (i = 0; i < 3; i++) {
- u8 ss;
-
- /*
- * Only consider slices where one, and only one, subslice has 7
- * EUs
- */
- if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]))
- continue;
-
- /*
- * subslice_7eu[i] != 0 (because of the check above) and
- * ss_max == 4 (maximum number of subslices possible per slice)
- *
- * -> 0 <= ss <= 3;
- */
- ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1;
- vals[i] = 3 - ss;
- }
-
- if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
- return 0;
-
- /* Tune IZ hashing. See intel_device_info_runtime_init() */
- WA_SET_FIELD_MASKED(GEN7_GT_MODE,
- GEN9_IZ_HASHING_MASK(2) |
- GEN9_IZ_HASHING_MASK(1) |
- GEN9_IZ_HASHING_MASK(0),
- GEN9_IZ_HASHING(2, vals[2]) |
- GEN9_IZ_HASHING(1, vals[1]) |
- GEN9_IZ_HASHING(0, vals[0]));
-
- return 0;
-}
-
-static int skl_init_workarounds(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- int ret;
-
- ret = gen9_init_workarounds(engine);
- if (ret)
- return ret;
-
- /* WaEnableGapsTsvCreditFix:skl */
- I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
- GEN9_GAPS_TSV_CREDIT_DISABLE));
-
- /* WaDisableGafsUnitClkGating:skl */
- I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
- GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
-
- /* WaInPlaceDecompressionHang:skl */
- if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
- I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
- (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
- GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
-
- /* WaDisableLSQCROPERFforOCL:skl */
- ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
- if (ret)
- return ret;
-
- return skl_tune_iz_hashing(engine);
-}
-
-static int bxt_init_workarounds(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- int ret;
-
- ret = gen9_init_workarounds(engine);
- if (ret)
- return ret;
-
- /* WaDisableThreadStallDopClockGating:bxt */
- WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
- STALL_DOP_GATING_DISABLE);
-
- /* WaDisablePooledEuLoadBalancingFix:bxt */
- I915_WRITE(FF_SLICE_CS_CHICKEN2,
- _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE));
-
- /* WaToEnableHwFixForPushConstHWBug:bxt */
- WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
- GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
-
- /* WaInPlaceDecompressionHang:bxt */
- I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
- (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
- GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
-
- return 0;
-}
-
-static int cnl_init_workarounds(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- int ret;
-
- /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
- if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
- I915_WRITE(GAMT_CHKN_BIT_REG,
- (I915_READ(GAMT_CHKN_BIT_REG) |
- GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT));
-
- /* WaForceContextSaveRestoreNonCoherent:cnl */
- WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
- HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
-
- /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
- if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
- WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
-
- /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
- WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
- GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
-
- /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
- if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0))
- WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
- GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
-
- /* WaInPlaceDecompressionHang:cnl */
- I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
- (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
- GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
-
- /* WaPushConstantDereferenceHoldDisable:cnl */
- WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
-
- /* FtrEnableFastAnisoL1BankingFix: cnl */
- WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
-
- /* WaDisable3DMidCmdPreemption:cnl */
- WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
-
- /* WaDisableGPGPUMidCmdPreemption:cnl */
- WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
- GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
-
- /* WaEnablePreemptionGranularityControlByUMD:cnl */
- I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
- _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
- ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
- if (ret)
- return ret;
-
- /* WaDisableEarlyEOT:cnl */
- WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
-
- return 0;
-}
-
-static int kbl_init_workarounds(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- int ret;
-
- ret = gen9_init_workarounds(engine);
- if (ret)
- return ret;
-
- /* WaEnableGapsTsvCreditFix:kbl */
- I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
- GEN9_GAPS_TSV_CREDIT_DISABLE));
-
- /* WaDisableDynamicCreditSharing:kbl */
- if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
- I915_WRITE(GAMT_CHKN_BIT_REG,
- (I915_READ(GAMT_CHKN_BIT_REG) |
- GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING));
-
- /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
- if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
- WA_SET_BIT_MASKED(HDC_CHICKEN0,
- HDC_FENCE_DEST_SLM_DISABLE);
-
- /* WaToEnableHwFixForPushConstHWBug:kbl */
- if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
- WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
- GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
-
- /* WaDisableGafsUnitClkGating:kbl */
- I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
- GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
-
- /* WaDisableSbeCacheDispatchPortSharing:kbl */
- WA_SET_BIT_MASKED(
- GEN7_HALF_SLICE_CHICKEN1,
- GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
-
- /* WaInPlaceDecompressionHang:kbl */
- I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
- (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
- GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
-
- /* WaDisableLSQCROPERFforOCL:kbl */
- ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
- if (ret)
- return ret;
-
- return 0;
-}
-
-static int glk_init_workarounds(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- int ret;
-
- ret = gen9_init_workarounds(engine);
- if (ret)
- return ret;
-
- /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
- ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1);
- if (ret)
- return ret;
-
- /* WaToEnableHwFixForPushConstHWBug:glk */
- WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
- GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
-
- return 0;
-}
-
-static int cfl_init_workarounds(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- int ret;
-
- ret = gen9_init_workarounds(engine);
- if (ret)
- return ret;
-
- /* WaEnableGapsTsvCreditFix:cfl */
- I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
- GEN9_GAPS_TSV_CREDIT_DISABLE));
-
- /* WaToEnableHwFixForPushConstHWBug:cfl */
- WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
- GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
-
- /* WaDisableGafsUnitClkGating:cfl */
- I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
- GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
-
- /* WaDisableSbeCacheDispatchPortSharing:cfl */
- WA_SET_BIT_MASKED(
- GEN7_HALF_SLICE_CHICKEN1,
- GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
-
- /* WaInPlaceDecompressionHang:cfl */
- I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
- (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
- GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
-
- return 0;
-}
-
-int init_workarounds_ring(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- int err;
-
- if (GEM_WARN_ON(engine->id != RCS))
- return -EINVAL;
-
- dev_priv->workarounds.count = 0;
- dev_priv->workarounds.hw_whitelist_count[engine->id] = 0;
-
- if (IS_BROADWELL(dev_priv))
- err = bdw_init_workarounds(engine);
- else if (IS_CHERRYVIEW(dev_priv))
- err = chv_init_workarounds(engine);
- else if (IS_SKYLAKE(dev_priv))
- err = skl_init_workarounds(engine);
- else if (IS_BROXTON(dev_priv))
- err = bxt_init_workarounds(engine);
- else if (IS_KABYLAKE(dev_priv))
- err = kbl_init_workarounds(engine);
- else if (IS_GEMINILAKE(dev_priv))
- err = glk_init_workarounds(engine);
- else if (IS_COFFEELAKE(dev_priv))
- err = cfl_init_workarounds(engine);
- else if (IS_CANNONLAKE(dev_priv))
- err = cnl_init_workarounds(engine);
- else
- err = 0;
- if (err)
- return err;
-
- DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n",
- engine->name, dev_priv->workarounds.count);
- return 0;
-}
-
-int intel_ring_workarounds_emit(struct i915_request *rq)
-{
- struct i915_workarounds *w = &rq->i915->workarounds;
- u32 *cs;
- int ret, i;
-
- if (w->count == 0)
- return 0;
-
- ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
- if (ret)
- return ret;
-
- cs = intel_ring_begin(rq, w->count * 2 + 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_LOAD_REGISTER_IMM(w->count);
- for (i = 0; i < w->count; i++) {
- *cs++ = i915_mmio_reg_offset(w->reg[i].addr);
- *cs++ = w->reg[i].value;
- }
- *cs++ = MI_NOOP;
-
- intel_ring_advance(rq, cs);
-
- ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
- if (ret)
- return ret;
-
- return 0;
-}
-
static bool ring_is_idle(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
@@ -1655,6 +1056,9 @@ void intel_engines_park(struct drm_i915_private *i915)
intel_engine_dump(engine, &p, NULL);
}
+ /* Must be reset upon idling, or we may miss the busy wakeup. */
+ GEM_BUG_ON(engine->execlists.queue_priority != INT_MIN);
+
if (engine->park)
engine->park(engine);
@@ -1713,13 +1117,15 @@ static void print_request(struct drm_printer *m,
struct i915_request *rq,
const char *prefix)
{
+ const char *name = rq->fence.ops->get_timeline_name(&rq->fence);
+
drm_printf(m, "%s%x%s [%llx:%x] prio=%d @ %dms: %s\n", prefix,
rq->global_seqno,
i915_request_completed(rq) ? "!" : "",
rq->fence.context, rq->fence.seqno,
rq->priotree.priority,
jiffies_to_msecs(jiffies - rq->emitted_jiffies),
- rq->timeline->common->name);
+ name);
}
static void hexdump(struct drm_printer *m, const void *buf, size_t len)
@@ -1825,12 +1231,15 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
read = GEN8_CSB_READ_PTR(ptr);
write = GEN8_CSB_WRITE_PTR(ptr);
- drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n",
+ drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n",
read, execlists->csb_head,
write,
intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
yesno(test_bit(ENGINE_IRQ_EXECLIST,
- &engine->irq_posted)));
+ &engine->irq_posted)),
+ yesno(test_bit(TASKLET_STATE_SCHED,
+ &engine->execlists.tasklet.state)),
+ enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
if (read >= GEN8_CSB_ENTRIES)
read = 0;
if (write >= GEN8_CSB_ENTRIES)
@@ -1929,12 +1338,16 @@ void intel_engine_dump(struct intel_engine_cs *engine,
rq->head, rq->postfix, rq->tail,
rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
- drm_printf(m, "\t\tring->start: 0x%08x\n",
+ drm_printf(m, "\t\tring->start: 0x%08x\n",
i915_ggtt_offset(rq->ring->vma));
- drm_printf(m, "\t\tring->head: 0x%08x\n",
+ drm_printf(m, "\t\tring->head: 0x%08x\n",
rq->ring->head);
- drm_printf(m, "\t\tring->tail: 0x%08x\n",
+ drm_printf(m, "\t\tring->tail: 0x%08x\n",
rq->ring->tail);
+ drm_printf(m, "\t\tring->emit: 0x%08x\n",
+ rq->ring->emit);
+ drm_printf(m, "\t\tring->space: 0x%08x\n",
+ rq->ring->space);
}
rcu_read_unlock();
@@ -2109,4 +1522,5 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine)
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_engine.c"
+#include "selftests/intel_engine_cs.c"
#endif