summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/intel_workarounds.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2023-08-07 13:49:24 +1000
committerDave Airlie <airlied@redhat.com>2023-08-07 13:49:25 +1000
commitd9aa1da9a8cfb0387eb5703c15bd1f54421460ac (patch)
treefd64841d76f3408ed5d25f4d9aaa5088a752e2fd /drivers/gpu/drm/i915/gt/intel_workarounds.c
parent7c9aa0f7463eede3226daf06ff7ccbb813f8b739 (diff)
parent28e671114fb0f28f334fac8d0a6b9c395c7b0498 (diff)
Merge tag 'drm-intel-gt-next-2023-08-04' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
Driver Changes: - Avoid infinite GPU waits by avoidin premature release of request's reusable memory (Chris, Janusz) - Expose RPS thresholds in sysfs (Tvrtko) - Apply GuC SLPC min frequency softlimit correctly (Vinay) - Restore SLPC efficient freq earlier (Vinay) - Consider OA buffer boundary when zeroing out reports (Umesh) - Extend Wa_14015795083 to TGL, RKL, DG1 and ADL (Matt R) - Fix context workarounds with non-masked regs on MTL/DG2 (Lucas) - Enable the CCS_FLUSH bit in the pipe control and in the CS for MTL+ (Andi) - Update MTL workarounds 14018778641, 22016122933 (Tejas, Zhanjun) - Ensure memory quiesced before AUX CCS invalidation (Jonathan) - Add a gsc_info debugfs (Daniele) - Invalidate the TLBs on each GT on multi-GT device (Chris) - Fix a VMA UAF for multi-gt platform (Nirmoy) - Do not use stolen on MTL due to HW bug (Nirmoy) - Check HuC and GuC version compatibility on MTL (Daniele) - Dump perf_limit_reasons for slow GuC init debug (Vinay) - Replace kmap() with kmap_local_page() (Sumitra, Ira) - Add sentinel to xehp_oa_b_counters for KASAN (Andrzej) - Add the gen12_needs_ccs_aux_inv helper (Andi) - Fixes and updates for GSC memory allocation (Daniele) - Fix one wrong caching mode enum usage (Tvrtko) - Fixes for GSC wakeref (Alan) - Static checker fixes (Harshit, Arnd, Dan, Cristophe, David, Andi) - Rename flags with bit_group_X according to the datasheet (Andi) - Use direct alias for i915 in requests (Andrzej) - Replace i915->gt0 with to_gt(i915) (Andi) - Use the i915_vma_flush_writes helper (Tvrtko) - Selftest improvements (Alan) - Remove dead code (Tvrtko) Signed-off-by: Dave Airlie <airlied@redhat.com> # Conflicts: # drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/ZMy6kDd9npweR4uy@jlahtine-mobl.ger.corp.intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_workarounds.c')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c148
1 files changed, 80 insertions, 68 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 4d2dece96011..589d009032fc 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -123,6 +123,22 @@ static void wa_init_finish(struct i915_wa_list *wal)
wal->wa_count, wal->name, wal->engine_name);
}
+static enum forcewake_domains
+wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
+{
+ enum forcewake_domains fw = 0;
+ struct i915_wa *wa;
+ unsigned int i;
+
+ for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
+ fw |= intel_uncore_forcewake_for_reg(uncore,
+ wa->reg,
+ FW_REG_READ |
+ FW_REG_WRITE);
+
+ return fw;
+}
+
static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
{
unsigned int addr = i915_mmio_reg_offset(wa->reg);
@@ -225,13 +241,13 @@ static void wa_mcr_add(struct i915_wa_list *wal, i915_mcr_reg_t reg,
static void
wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
{
- wa_add(wal, reg, clear, set, clear, false);
+ wa_add(wal, reg, clear, set, clear | set, false);
}
static void
wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clear, u32 set)
{
- wa_mcr_add(wal, reg, clear, set, clear, false);
+ wa_mcr_add(wal, reg, clear, set, clear | set, false);
}
static void
@@ -621,10 +637,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
/* Wa_1406697149 (WaDisableBankHangMode:icl) */
- wa_write(wal,
- GEN8_L3CNTLREG,
- intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
- GEN8_ERRDETBCTRL);
+ wa_write(wal, GEN8_L3CNTLREG, GEN8_ERRDETBCTRL);
/* WaForceEnableNonCoherent:icl
* This is not the same workaround as in early Gen9 platforms, where
@@ -653,7 +666,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
/* Wa_1604278689:icl,ehl */
wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER,
- 0, /* write-only register; skip validation */
+ 0,
0xFFFFFFFF);
/* Wa_1406306137:icl,ehl */
@@ -670,38 +683,8 @@ static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
wa_mcr_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
- wa_mcr_add(wal,
- XEHP_FF_MODE2,
- FF_MODE2_TDS_TIMER_MASK,
- FF_MODE2_TDS_TIMER_128,
- 0, false);
-}
-
-/*
- * These settings aren't actually workarounds, but general tuning settings that
- * need to be programmed on several platforms.
- */
-static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine,
- struct i915_wa_list *wal)
-{
- /*
- * Although some platforms refer to it as Wa_1604555607, we need to
- * program it even on those that don't explicitly list that
- * workaround.
- *
- * Note that the programming of this register is further modified
- * according to the FF_MODE2 guidance given by Wa_1608008084:gen12.
- * Wa_1608008084 tells us the FF_MODE2 register will return the wrong
- * value when read. The default value for this register is zero for all
- * fields and there are no bit masks. So instead of doing a RMW we
- * should just write TDS timer value. For the same reason read
- * verification is ignored.
- */
- wa_add(wal,
- GEN12_FF_MODE2,
- FF_MODE2_TDS_TIMER_MASK,
- FF_MODE2_TDS_TIMER_128,
- 0, false);
+ wa_mcr_write_clr_set(wal, XEHP_FF_MODE2, FF_MODE2_TDS_TIMER_MASK,
+ FF_MODE2_TDS_TIMER_128);
}
static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -709,8 +692,6 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
{
struct drm_i915_private *i915 = engine->i915;
- gen12_ctx_gt_tuning_init(engine, wal);
-
/*
* Wa_1409142259:tgl,dg1,adl-p
* Wa_1409347922:tgl,dg1,adl-p
@@ -732,15 +713,27 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
/*
- * Wa_16011163337
+ * Wa_16011163337 - GS_TIMER
+ *
+ * TDS_TIMER: Although some platforms refer to it as Wa_1604555607, we
+ * need to program it even on those that don't explicitly list that
+ * workaround.
+ *
+ * Note that the programming of GEN12_FF_MODE2 is further modified
+ * according to the FF_MODE2 guidance given by Wa_1608008084.
+ * Wa_1608008084 tells us the FF_MODE2 register will return the wrong
+ * value when read from the CPU.
*
- * Like in gen12_ctx_gt_tuning_init(), read verification is ignored due
- * to Wa_1608008084.
+ * The default value for this register is zero for all fields.
+ * So instead of doing a RMW we should just write the desired values
+ * for TDS and GS timers. Note that since the readback can't be trusted,
+ * the clear mask is just set to ~0 to make sure other bits are not
+ * inadvertently set. For the same reason read verification is ignored.
*/
wa_add(wal,
GEN12_FF_MODE2,
- FF_MODE2_GS_TIMER_MASK,
- FF_MODE2_GS_TIMER_224,
+ ~0,
+ FF_MODE2_TDS_TIMER_128 | FF_MODE2_GS_TIMER_224,
0, false);
if (!IS_DG1(i915)) {
@@ -987,6 +980,9 @@ void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
int intel_engine_emit_ctx_wa(struct i915_request *rq)
{
struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
+ struct intel_uncore *uncore = rq->engine->uncore;
+ enum forcewake_domains fw;
+ unsigned long flags;
struct i915_wa *wa;
unsigned int i;
u32 *cs;
@@ -1003,13 +999,36 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
if (IS_ERR(cs))
return PTR_ERR(cs);
+ fw = wal_get_fw_for_rmw(uncore, wal);
+
+ intel_gt_mcr_lock(wal->gt, &flags);
+ spin_lock(&uncore->lock);
+ intel_uncore_forcewake_get__locked(uncore, fw);
+
*cs++ = MI_LOAD_REGISTER_IMM(wal->count);
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
+ u32 val;
+
+ /* Skip reading the register if it's not really needed */
+ if (wa->masked_reg || (wa->clr | wa->set) == U32_MAX) {
+ val = wa->set;
+ } else {
+ val = wa->is_mcr ?
+ intel_gt_mcr_read_any_fw(wal->gt, wa->mcr_reg) :
+ intel_uncore_read_fw(uncore, wa->reg);
+ val &= ~wa->clr;
+ val |= wa->set;
+ }
+
*cs++ = i915_mmio_reg_offset(wa->reg);
- *cs++ = wa->set;
+ *cs++ = val;
}
*cs++ = MI_NOOP;
+ intel_uncore_forcewake_put__locked(uncore, fw);
+ spin_unlock(&uncore->lock);
+ intel_gt_mcr_unlock(wal->gt, flags);
+
intel_ring_advance(rq, cs);
ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
@@ -1485,6 +1504,18 @@ gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
/* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
+
+ /*
+ * Wa_14015795083
+ *
+ * Firmware on some gen12 platforms locks the MISCCPCTL register,
+ * preventing i915 from modifying it for this workaround. Skip the
+ * readback verification for this workaround on debug builds; if the
+ * workaround doesn't stick due to firmware behavior, it's not an error
+ * that we want CI to flag.
+ */
+ wa_add(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE,
+ 0, 0, false);
}
static void
@@ -1710,7 +1741,6 @@ static void
xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
/* Wa_14018778641 / Wa_18018781329 */
- wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
/* Wa_22016670082 */
@@ -1743,8 +1773,6 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
* GT, the media GT's versions are regular singleton registers.
*/
wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB);
- wa_write_or(wal, XELPMP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
- wa_write_or(wal, XELPMP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
debug_dump_steering(gt);
}
@@ -1850,22 +1878,6 @@ void intel_gt_init_workarounds(struct intel_gt *gt)
wa_init_finish(wal);
}
-static enum forcewake_domains
-wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
-{
- enum forcewake_domains fw = 0;
- struct i915_wa *wa;
- unsigned int i;
-
- for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
- fw |= intel_uncore_forcewake_for_reg(uncore,
- wa->reg,
- FW_REG_READ |
- FW_REG_WRITE);
-
- return fw;
-}
-
static bool
wa_verify(struct intel_gt *gt, const struct i915_wa *wa, u32 cur,
const char *name, const char *from)
@@ -3237,7 +3249,7 @@ wa_list_srm(struct i915_request *rq,
const struct i915_wa_list *wal,
struct i915_vma *vma)
{
- struct drm_i915_private *i915 = rq->engine->i915;
+ struct drm_i915_private *i915 = rq->i915;
unsigned int i, count = 0;
const struct i915_wa *wa;
u32 srm, *cs;
@@ -3336,7 +3348,7 @@ retry:
err = 0;
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
- if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg)))
+ if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg)))
continue;
if (!wa_verify(wal->gt, wa, results[i], wal->name, from))