From ce7e75c7ef1bf8ea3d947da8c674d2f40fd7d734 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 28 Jul 2021 12:21:00 -0700 Subject: drm/i915: Disable bonding on gen12+ platforms Disable bonding on gen12+ platforms aside from ones already supported by the i915 - TGL, RKL, and ADL-S. Signed-off-by: Matthew Brost Reviewed-by: John Harrison Acked-by: Daniel Vetter Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210728192100.132425-1-matthew.brost@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index cff72679ad7c..dbaeb924a437 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -442,6 +442,13 @@ set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data) u16 idx, num_bonds; int err, n; + if (GRAPHICS_VER(i915) >= 12 && !IS_TIGERLAKE(i915) && + !IS_ROCKETLAKE(i915) && !IS_ALDERLAKE_S(i915)) { + drm_dbg(&i915->drm, + "Bonding on gen12+ aside from TGL, RKL, and ADL_S not supported\n"); + return -ENODEV; + } + if (get_user(idx, &ext->virtual_index)) return -EFAULT; -- cgit v1.2.3-70-g09d2 From bc33e71f00a7491810cac9e1335ca97e889d5620 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 28 Jul 2021 22:41:13 -0700 Subject: drm/i915: correct name of GT forcewake domain in error messages For historical reasons, the GT forcewake domain used to be referred to as the "blitter" domain; that name is no longer accurate since the GT domain contains a lot of additional registers and functionality besides just the blitter. Although we renamed the domain in the driver in commit 55e3c170950f ("drm/i915: Rename FORCEWAKE_BLITTER to FORCEWAKE_GT"), we neglected to update the string that gets printed in driver error messages; let's do that now to avoid confusion. Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210729054118.2458523-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 0c35acfcd6da..13d069823635 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -64,7 +64,7 @@ static void mmio_debug_resume(struct intel_uncore_mmio_debug *mmio_debug) static const char * const forcewake_domain_names[] = { "render", - "blitter", + "gt", "media", "vdbox0", "vdbox1", -- cgit v1.2.3-70-g09d2 From 39afa4104bedf214e5779ef20655665723ad48cd Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 28 Jul 2021 22:41:14 -0700 Subject: drm/i915: Re-use gen11 forcewake read functions on gen12 The forcewake read logic is identical between gen11 and gen12, only the forcewake table data (which is tracked separately) differs; there's no need to generate a separate set of gen12 read functions when the gen11 functions will work just as well. We'll keep the separate write functions for now since the generated code directly references different shadow tables between the two platforms. Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210729054118.2458523-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 13d069823635..9ed7ce71e520 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -945,9 +945,6 @@ static const struct intel_forcewake_range __vlv_fw_ranges[] = { #define __gen11_fwtable_reg_read_fw_domains(uncore, offset) \ find_fw_domain(uncore, offset) -#define __gen12_fwtable_reg_read_fw_domains(uncore, offset) \ - find_fw_domain(uncore, offset) - /* *Must* be sorted by offset! See intel_shadow_table_check(). */ static const i915_reg_t gen8_shadowed_regs[] = { RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ @@ -1644,7 +1641,6 @@ __gen_read(func, 16) \ __gen_read(func, 32) \ __gen_read(func, 64) -__gen_reg_read_funcs(gen12_fwtable); __gen_reg_read_funcs(gen11_fwtable); __gen_reg_read_funcs(fwtable); __gen_reg_read_funcs(gen6); @@ -2122,7 +2118,7 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) } else if (GRAPHICS_VER(i915) >= 12) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen12_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, gen12_fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (GRAPHICS_VER(i915) == 11) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen11_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen11_fwtable); -- cgit v1.2.3-70-g09d2 From f9d56cd64ef3186d6ce072751f7f44dcd189f6bc Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 29 Jul 2021 08:21:58 -0700 Subject: drm/i915: Make shadow tables range-based Rather than defining our shadow tables as a list of individual registers, provide them as a list of register ranges; we'll have some ranges of multiple registers being added soon (and we already have a couple adjacent registers that we can squash into a single range now). This change also defines the table with hex literal values rather than symbolic register names; since that's how the tables are defined in the bspec, this change will make it easier to review the tables overall. v2: - Force signed comparison on range overlap sanity check Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210729152158.2646246-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 13 +-- drivers/gpu/drm/i915/intel_uncore.c | 160 +++++++++++++------------- drivers/gpu/drm/i915/intel_uncore.h | 6 + drivers/gpu/drm/i915/selftests/intel_uncore.c | 32 ++++-- 4 files changed, 108 insertions(+), 103 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index aae609d7d85d..e9b8af9f08ea 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2067,12 +2067,7 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine) wa_list_apply(engine->gt, &engine->wa_list); } -struct mcr_range { - u32 start; - u32 end; -}; - -static const struct mcr_range mcr_ranges_gen8[] = { +static const struct i915_range mcr_ranges_gen8[] = { { .start = 0x5500, .end = 0x55ff }, { .start = 0x7000, .end = 0x7fff }, { .start = 0x9400, .end = 0x97ff }, @@ -2081,7 +2076,7 @@ static const struct mcr_range mcr_ranges_gen8[] = { {}, }; -static const struct mcr_range mcr_ranges_gen12[] = { +static const struct i915_range mcr_ranges_gen12[] = { { .start = 0x8150, .end = 0x815f }, { .start = 0x9520, .end = 0x955f }, { .start = 0xb100, .end = 0xb3ff }, @@ -2090,7 +2085,7 @@ static const struct mcr_range mcr_ranges_gen12[] = { {}, }; -static const struct mcr_range mcr_ranges_xehp[] = { +static const struct i915_range mcr_ranges_xehp[] = { { .start = 0x4000, .end = 0x4aff }, { .start = 0x5200, .end = 0x52ff }, { .start = 0x5400, .end = 0x7fff }, @@ -2109,7 +2104,7 @@ static const struct mcr_range mcr_ranges_xehp[] = { static bool mcr_range(struct drm_i915_private *i915, u32 offset) { - const struct mcr_range *mcr_ranges; + const struct i915_range *mcr_ranges; int i; if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 9ed7ce71e520..52601b960248 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -946,101 +946,95 @@ static const struct intel_forcewake_range __vlv_fw_ranges[] = { find_fw_domain(uncore, offset) /* *Must* be sorted by offset! See intel_shadow_table_check(). */ -static const i915_reg_t gen8_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(GEN6_BSD_RING_BASE), /* 0x12000 (base) */ - RING_TAIL(VEBOX_RING_BASE), /* 0x1a000 (base) */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ +static const struct i915_range gen8_shadowed_regs[] = { + { .start = 0x2030, .end = 0x2030 }, + { .start = 0xA008, .end = 0xA00C }, + { .start = 0x12030, .end = 0x12030 }, + { .start = 0x1a030, .end = 0x1a030 }, + { .start = 0x22030, .end = 0x22030 }, /* TODO: Other registers are not yet used */ }; -static const i915_reg_t gen11_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ - RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ - RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ - RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ - RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ - RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ - RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ - RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ +static const struct i915_range gen11_shadowed_regs[] = { + { .start = 0x2030, .end = 0x2030 }, + { .start = 0x2550, .end = 0x2550 }, + { .start = 0xA008, .end = 0xA00C }, + { .start = 0x22030, .end = 0x22030 }, + { .start = 0x22550, .end = 0x22550 }, + { .start = 0x1C0030, .end = 0x1C0030 }, + { .start = 0x1C0550, .end = 0x1C0550 }, + { .start = 0x1C4030, .end = 0x1C4030 }, + { .start = 0x1C4550, .end = 0x1C4550 }, + { .start = 0x1C8030, .end = 0x1C8030 }, + { .start = 0x1C8550, .end = 0x1C8550 }, + { .start = 0x1D0030, .end = 0x1D0030 }, + { .start = 0x1D0550, .end = 0x1D0550 }, + { .start = 0x1D4030, .end = 0x1D4030 }, + { .start = 0x1D4550, .end = 0x1D4550 }, + { .start = 0x1D8030, .end = 0x1D8030 }, + { .start = 0x1D8550, .end = 0x1D8550 }, /* TODO: Other registers are not yet used */ }; -static const i915_reg_t gen12_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ - RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ - RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ - RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ - RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ - RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ - RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ - RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ +static const struct i915_range gen12_shadowed_regs[] = { + { .start = 0x2030, .end = 0x2030 }, + { .start = 0x2550, .end = 0x2550 }, + { .start = 0xA008, .end = 0xA00C }, + { .start = 0x22030, .end = 0x22030 }, + { .start = 0x22550, .end = 0x22550 }, + { .start = 0x1C0030, .end = 0x1C0030 }, + { .start = 0x1C0550, .end = 0x1C0550 }, + { .start = 0x1C4030, .end = 0x1C4030 }, + { .start = 0x1C4550, .end = 0x1C4550 }, + { .start = 0x1C8030, .end = 0x1C8030 }, + { .start = 0x1C8550, .end = 0x1C8550 }, + { .start = 0x1D0030, .end = 0x1D0030 }, + { .start = 0x1D0550, .end = 0x1D0550 }, + { .start = 0x1D4030, .end = 0x1D4030 }, + { .start = 0x1D4550, .end = 0x1D4550 }, + { .start = 0x1D8030, .end = 0x1D8030 }, + { .start = 0x1D8550, .end = 0x1D8550 }, /* TODO: Other registers are not yet used */ }; -static const i915_reg_t xehp_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ - RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ - RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ - RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ - RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ - RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ - RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ - RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ - RING_TAIL(XEHP_BSD5_RING_BASE), /* 0x1E0000 (base) */ - RING_EXECLIST_CONTROL(XEHP_BSD5_RING_BASE), /* 0x1E0550 */ - RING_TAIL(XEHP_BSD6_RING_BASE), /* 0x1E4000 (base) */ - RING_EXECLIST_CONTROL(XEHP_BSD6_RING_BASE), /* 0x1E4550 */ - RING_TAIL(XEHP_VEBOX3_RING_BASE), /* 0x1E8000 (base) */ - RING_EXECLIST_CONTROL(XEHP_VEBOX3_RING_BASE), /* 0x1E8550 */ - RING_TAIL(XEHP_BSD7_RING_BASE), /* 0x1F0000 (base) */ - RING_EXECLIST_CONTROL(XEHP_BSD7_RING_BASE), /* 0x1F0550 */ - RING_TAIL(XEHP_BSD8_RING_BASE), /* 0x1F4000 (base) */ - RING_EXECLIST_CONTROL(XEHP_BSD8_RING_BASE), /* 0x1F4550 */ - RING_TAIL(XEHP_VEBOX4_RING_BASE), /* 0x1F8000 (base) */ - RING_EXECLIST_CONTROL(XEHP_VEBOX4_RING_BASE), /* 0x1F8550 */ +static const struct i915_range xehp_shadowed_regs[] = { + { .start = 0x2000, .end = 0x2030 }, + { .start = 0x2550, .end = 0x2550 }, + { .start = 0xA008, .end = 0xA00C }, + { .start = 0x22030, .end = 0x22030 }, + { .start = 0x22550, .end = 0x22550 }, + { .start = 0x1C0030, .end = 0x1C0030 }, + { .start = 0x1C0550, .end = 0x1C0550 }, + { .start = 0x1C4030, .end = 0x1C4030 }, + { .start = 0x1C4550, .end = 0x1C4550 }, + { .start = 0x1C8030, .end = 0x1C8030 }, + { .start = 0x1C8550, .end = 0x1C8550 }, + { .start = 0x1D0030, .end = 0x1D0030 }, + { .start = 0x1D0550, .end = 0x1D0550 }, + { .start = 0x1D4030, .end = 0x1D4030 }, + { .start = 0x1D4550, .end = 0x1D4550 }, + { .start = 0x1D8030, .end = 0x1D8030 }, + { .start = 0x1D8550, .end = 0x1D8550 }, + { .start = 0x1E0030, .end = 0x1E0030 }, + { .start = 0x1E0550, .end = 0x1E0550 }, + { .start = 0x1E4030, .end = 0x1E4030 }, + { .start = 0x1E4550, .end = 0x1E4550 }, + { .start = 0x1E8030, .end = 0x1E8030 }, + { .start = 0x1E8550, .end = 0x1E8550 }, + { .start = 0x1F0030, .end = 0x1F0030 }, + { .start = 0x1F0550, .end = 0x1F0550 }, + { .start = 0x1F4030, .end = 0x1F4030 }, + { .start = 0x1F4550, .end = 0x1F4550 }, + { .start = 0x1F8030, .end = 0x1F8030 }, + { .start = 0x1F8550, .end = 0x1F8550 }, /* TODO: Other registers are not yet used */ }; -static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) +static int mmio_range_cmp(u32 key, const struct i915_range *range) { - u32 offset = i915_mmio_reg_offset(*reg); - - if (key < offset) + if (key < range->start) return -1; - else if (key > offset) + else if (key > range->end) return 1; else return 0; @@ -1049,9 +1043,9 @@ static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) #define __is_X_shadowed(x) \ static bool is_##x##_shadowed(u32 offset) \ { \ - const i915_reg_t *regs = x##_shadowed_regs; \ + const struct i915_range *regs = x##_shadowed_regs; \ return BSEARCH(offset, regs, ARRAY_SIZE(x##_shadowed_regs), \ - mmio_reg_cmp); \ + mmio_range_cmp); \ } __is_X_shadowed(gen8) diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 3c0b0a8b5250..531665b08039 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -119,6 +119,12 @@ struct intel_forcewake_range { enum forcewake_domains domains; }; +/* Other register ranges (e.g., shadow tables, MCR tables, etc.) */ +struct i915_range { + u32 start; + u32 end; +}; + struct intel_uncore { void __iomem *regs; diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 720b60853f8b..d6a9c11afa23 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -62,30 +62,40 @@ static int intel_fw_table_check(const struct intel_forcewake_range *ranges, static int intel_shadow_table_check(void) { struct { - const i915_reg_t *regs; + const struct i915_range *regs; unsigned int size; - } reg_lists[] = { + } range_lists[] = { { gen8_shadowed_regs, ARRAY_SIZE(gen8_shadowed_regs) }, { gen11_shadowed_regs, ARRAY_SIZE(gen11_shadowed_regs) }, { gen12_shadowed_regs, ARRAY_SIZE(gen12_shadowed_regs) }, { xehp_shadowed_regs, ARRAY_SIZE(xehp_shadowed_regs) }, }; - const i915_reg_t *reg; + const struct i915_range *range; unsigned int i, j; s32 prev; - for (j = 0; j < ARRAY_SIZE(reg_lists); ++j) { - reg = reg_lists[j].regs; - for (i = 0, prev = -1; i < reg_lists[j].size; i++, reg++) { - u32 offset = i915_mmio_reg_offset(*reg); + for (j = 0; j < ARRAY_SIZE(range_lists); ++j) { + range = range_lists[j].regs; + for (i = 0, prev = -1; i < range_lists[j].size; i++, range++) { + if (range->end < range->start) { + pr_err("%s: range[%d]:(%06x-%06x) has end before start\n", + __func__, i, range->start, range->end); + return -EINVAL; + } + + if (prev >= (s32)range->start) { + pr_err("%s: range[%d]:(%06x-%06x) is before end of previous (%06x)\n", + __func__, i, range->start, range->end, prev); + return -EINVAL; + } - if (prev >= (s32)offset) { - pr_err("%s: entry[%d]:(%x) is before previous (%x)\n", - __func__, i, offset, prev); + if (range->start % 4) { + pr_err("%s: range[%d]:(%06x-%06x) has non-dword-aligned start\n", + __func__, i, range->start, range->end); return -EINVAL; } - prev = offset; + prev = range->end; } } -- cgit v1.2.3-70-g09d2 From 0bb50de156d8280e53884adf1d5a04d6108f90e7 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 28 Jul 2021 22:41:16 -0700 Subject: drm/i915/gen11: Update shadowed register table The bspec lists many shadowed registers (i.e., registers for which we don't need to grab forcewake when writing) that we weren't tracking in the driver. Although we may not actually use all of these registers right now, it's best to just match the bspec list exactly. Note that the bspec also lists registers that are shadowed for various HW-internal accesses; we can ignore those and just list the ones that are shadowed for accesses from the IA/CPU. Bspec: 18333 Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210729054118.2458523-5-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 52601b960248..6b5b029148b2 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -960,20 +960,26 @@ static const struct i915_range gen11_shadowed_regs[] = { { .start = 0x2550, .end = 0x2550 }, { .start = 0xA008, .end = 0xA00C }, { .start = 0x22030, .end = 0x22030 }, - { .start = 0x22550, .end = 0x22550 }, + { .start = 0x22230, .end = 0x22230 }, + { .start = 0x22510, .end = 0x22550 }, { .start = 0x1C0030, .end = 0x1C0030 }, - { .start = 0x1C0550, .end = 0x1C0550 }, + { .start = 0x1C0230, .end = 0x1C0230 }, + { .start = 0x1C0510, .end = 0x1C0550 }, { .start = 0x1C4030, .end = 0x1C4030 }, - { .start = 0x1C4550, .end = 0x1C4550 }, + { .start = 0x1C4230, .end = 0x1C4230 }, + { .start = 0x1C4510, .end = 0x1C4550 }, { .start = 0x1C8030, .end = 0x1C8030 }, - { .start = 0x1C8550, .end = 0x1C8550 }, + { .start = 0x1C8230, .end = 0x1C8230 }, + { .start = 0x1C8510, .end = 0x1C8550 }, { .start = 0x1D0030, .end = 0x1D0030 }, - { .start = 0x1D0550, .end = 0x1D0550 }, + { .start = 0x1D0230, .end = 0x1D0230 }, + { .start = 0x1D0510, .end = 0x1D0550 }, { .start = 0x1D4030, .end = 0x1D4030 }, - { .start = 0x1D4550, .end = 0x1D4550 }, + { .start = 0x1D4230, .end = 0x1D4230 }, + { .start = 0x1D4510, .end = 0x1D4550 }, { .start = 0x1D8030, .end = 0x1D8030 }, - { .start = 0x1D8550, .end = 0x1D8550 }, - /* TODO: Other registers are not yet used */ + { .start = 0x1D8230, .end = 0x1D8230 }, + { .start = 0x1D8510, .end = 0x1D8550 }, }; static const struct i915_range gen12_shadowed_regs[] = { -- cgit v1.2.3-70-g09d2 From 5798a769d6f5be656638c5e6e0cd5c4f155a2fb5 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 28 Jul 2021 22:41:17 -0700 Subject: drm/i915/gen12: Update shadowed register table The bspec lists many shadowed registers (i.e., registers for which we don't need to grab forcewake when writing) that we weren't tracking in the driver. Although we may not actually use all of these registers right now, it's best to just match the bspec list exactly. Note that the bspec also lists registers that are shadowed for various HW-internal accesses; we can ignore those and just list the ones that are shadowed for accesses from the IA/CPU. Bspec: 52077 Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210729054118.2458523-6-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 6b5b029148b2..0b2dbcc14802 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -984,23 +984,28 @@ static const struct i915_range gen11_shadowed_regs[] = { static const struct i915_range gen12_shadowed_regs[] = { { .start = 0x2030, .end = 0x2030 }, - { .start = 0x2550, .end = 0x2550 }, + { .start = 0x2510, .end = 0x2550 }, { .start = 0xA008, .end = 0xA00C }, + { .start = 0xA188, .end = 0xA188 }, + { .start = 0xA278, .end = 0xA278 }, + { .start = 0xA540, .end = 0xA56C }, + { .start = 0xC4C8, .end = 0xC4C8 }, + { .start = 0xC4D4, .end = 0xC4D4 }, + { .start = 0xC600, .end = 0xC600 }, { .start = 0x22030, .end = 0x22030 }, - { .start = 0x22550, .end = 0x22550 }, + { .start = 0x22510, .end = 0x22550 }, { .start = 0x1C0030, .end = 0x1C0030 }, - { .start = 0x1C0550, .end = 0x1C0550 }, + { .start = 0x1C0510, .end = 0x1C0550 }, { .start = 0x1C4030, .end = 0x1C4030 }, - { .start = 0x1C4550, .end = 0x1C4550 }, + { .start = 0x1C4510, .end = 0x1C4550 }, { .start = 0x1C8030, .end = 0x1C8030 }, - { .start = 0x1C8550, .end = 0x1C8550 }, + { .start = 0x1C8510, .end = 0x1C8550 }, { .start = 0x1D0030, .end = 0x1D0030 }, - { .start = 0x1D0550, .end = 0x1D0550 }, + { .start = 0x1D0510, .end = 0x1D0550 }, { .start = 0x1D4030, .end = 0x1D4030 }, - { .start = 0x1D4550, .end = 0x1D4550 }, + { .start = 0x1D4510, .end = 0x1D4550 }, { .start = 0x1D8030, .end = 0x1D8030 }, - { .start = 0x1D8550, .end = 0x1D8550 }, - /* TODO: Other registers are not yet used */ + { .start = 0x1D8510, .end = 0x1D8550 }, }; static const struct i915_range xehp_shadowed_regs[] = { -- cgit v1.2.3-70-g09d2 From 5c5c40e28c52a36bb5ac26817275d5a0281ab819 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 28 Jul 2021 22:41:18 -0700 Subject: drm/i915/xehp: Xe_HP shadowed registers are a strict superset of gen12 The list of shadowed registers on XeHP is identical to the set for earlier gen12 platforms, with additional ranges added for the new VCS and VECS engines. Since those register ranges were reserved on earlier gen12 platforms, it's safe to consolidate to a single gen12 table rather than tracking Xe_HP separately. Bspec: 52077 Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210729054118.2458523-7-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 52 +++++++-------------------- drivers/gpu/drm/i915/selftests/intel_uncore.c | 1 - 2 files changed, 13 insertions(+), 40 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 0b2dbcc14802..de4ef9bd3b51 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1006,39 +1006,24 @@ static const struct i915_range gen12_shadowed_regs[] = { { .start = 0x1D4510, .end = 0x1D4550 }, { .start = 0x1D8030, .end = 0x1D8030 }, { .start = 0x1D8510, .end = 0x1D8550 }, -}; -static const struct i915_range xehp_shadowed_regs[] = { - { .start = 0x2000, .end = 0x2030 }, - { .start = 0x2550, .end = 0x2550 }, - { .start = 0xA008, .end = 0xA00C }, - { .start = 0x22030, .end = 0x22030 }, - { .start = 0x22550, .end = 0x22550 }, - { .start = 0x1C0030, .end = 0x1C0030 }, - { .start = 0x1C0550, .end = 0x1C0550 }, - { .start = 0x1C4030, .end = 0x1C4030 }, - { .start = 0x1C4550, .end = 0x1C4550 }, - { .start = 0x1C8030, .end = 0x1C8030 }, - { .start = 0x1C8550, .end = 0x1C8550 }, - { .start = 0x1D0030, .end = 0x1D0030 }, - { .start = 0x1D0550, .end = 0x1D0550 }, - { .start = 0x1D4030, .end = 0x1D4030 }, - { .start = 0x1D4550, .end = 0x1D4550 }, - { .start = 0x1D8030, .end = 0x1D8030 }, - { .start = 0x1D8550, .end = 0x1D8550 }, + /* + * The rest of these ranges are specific to Xe_HP and beyond, but + * are reserved/unused ranges on earlier gen12 platforms, so they can + * be safely added to the gen12 table. + */ { .start = 0x1E0030, .end = 0x1E0030 }, - { .start = 0x1E0550, .end = 0x1E0550 }, + { .start = 0x1E0510, .end = 0x1E0550 }, { .start = 0x1E4030, .end = 0x1E4030 }, - { .start = 0x1E4550, .end = 0x1E4550 }, + { .start = 0x1E4510, .end = 0x1E4550 }, { .start = 0x1E8030, .end = 0x1E8030 }, - { .start = 0x1E8550, .end = 0x1E8550 }, + { .start = 0x1E8510, .end = 0x1E8550 }, { .start = 0x1F0030, .end = 0x1F0030 }, - { .start = 0x1F0550, .end = 0x1F0550 }, + { .start = 0x1F0510, .end = 0x1F0550 }, { .start = 0x1F4030, .end = 0x1F4030 }, - { .start = 0x1F4550, .end = 0x1F4550 }, + { .start = 0x1F4510, .end = 0x1F4550 }, { .start = 0x1F8030, .end = 0x1F8030 }, - { .start = 0x1F8550, .end = 0x1F8550 }, - /* TODO: Other registers are not yet used */ + { .start = 0x1F8510, .end = 0x1F8550 }, }; static int mmio_range_cmp(u32 key, const struct i915_range *range) @@ -1062,7 +1047,6 @@ static bool is_##x##_shadowed(u32 offset) \ __is_X_shadowed(gen8) __is_X_shadowed(gen11) __is_X_shadowed(gen12) -__is_X_shadowed(xehp) static enum forcewake_domains gen6_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) @@ -1126,15 +1110,6 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = { __fwd; \ }) -#define __xehp_fwtable_reg_write_fw_domains(uncore, offset) \ -({ \ - enum forcewake_domains __fwd = 0; \ - const u32 __offset = (offset); \ - if (!is_xehp_shadowed(__offset)) \ - __fwd = find_fw_domain(uncore, __offset); \ - __fwd; \ -}) - /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ static const struct intel_forcewake_range __gen9_fw_ranges[] = { GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_GT), @@ -1737,7 +1712,6 @@ __gen_write(func, 8) \ __gen_write(func, 16) \ __gen_write(func, 32) -__gen_reg_write_funcs(xehp_fwtable); __gen_reg_write_funcs(gen12_fwtable); __gen_reg_write_funcs(gen11_fwtable); __gen_reg_write_funcs(fwtable); @@ -2114,11 +2088,11 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __dg2_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, xehp_fwtable); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __xehp_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, xehp_fwtable); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (GRAPHICS_VER(i915) >= 12) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen12_fw_ranges); diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index d6a9c11afa23..22ef2c87df1a 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -68,7 +68,6 @@ static int intel_shadow_table_check(void) { gen8_shadowed_regs, ARRAY_SIZE(gen8_shadowed_regs) }, { gen11_shadowed_regs, ARRAY_SIZE(gen11_shadowed_regs) }, { gen12_shadowed_regs, ARRAY_SIZE(gen12_shadowed_regs) }, - { xehp_shadowed_regs, ARRAY_SIZE(xehp_shadowed_regs) }, }; const struct i915_range *range; unsigned int i, j; -- cgit v1.2.3-70-g09d2 From fa9899dad3ed84a8b6433467670d4cacd9b873bc Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 Aug 2021 09:36:40 -0700 Subject: drm/i915/xehp: Loop over all gslices for INSTDONE processing We no longer have traditional slices on Xe_HP platforms, but the INSTDONE registers are replicated according to gslice representation which is similar. We can mostly re-use the existing instdone code with just a few modifications: * Create an alternate instdone loop macro that will iterate over the flat DSS space, but still provide the gslice/dss steering values for compatibility with the legacy code. * We should allocate INSTDONE storage space according to the maximum number of gslices rather than the maximum number of legacy slices to ensure we have enough storage space to hold all of the values. XeHP design has 8 gslices, whereas older platforms never had more than 3 slices. Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20210805163647.801064-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 48 +++++++++++++++------------- drivers/gpu/drm/i915/gt/intel_engine_types.h | 12 +++++-- drivers/gpu/drm/i915/gt/intel_sseu.h | 7 ++++ drivers/gpu/drm/i915/i915_gpu_error.c | 32 +++++++++++++------ 4 files changed, 66 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 0d9105a31d84..58ed67894b3d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1163,16 +1163,16 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, u32 mmio_base = engine->mmio_base; int slice; int subslice; + int iter; memset(instdone, 0, sizeof(*instdone)); - switch (GRAPHICS_VER(i915)) { - default: + if (GRAPHICS_VER(i915) >= 8) { instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); if (engine->id != RCS0) - break; + return; instdone->slice_common = intel_uncore_read(uncore, GEN7_SC_INSTDONE); @@ -1182,21 +1182,32 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, instdone->slice_common_extra[1] = intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2); } - for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { - instdone->sampler[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_SAMPLER_INSTDONE); - instdone->row[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_ROW_INSTDONE); + + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { + instdone->sampler[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_SAMPLER_INSTDONE); + instdone->row[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_ROW_INSTDONE); + } + } else { + for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { + instdone->sampler[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_SAMPLER_INSTDONE); + instdone->row[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_ROW_INSTDONE); + } } - break; - case 7: + } else if (GRAPHICS_VER(i915) >= 7) { instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); if (engine->id != RCS0) - break; + return; instdone->slice_common = intel_uncore_read(uncore, GEN7_SC_INSTDONE); @@ -1204,22 +1215,15 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE); instdone->row[0][0] = intel_uncore_read(uncore, GEN7_ROW_INSTDONE); - - break; - case 6: - case 5: - case 4: + } else if (GRAPHICS_VER(i915) >= 4) { instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); if (engine->id == RCS0) /* HACK: Using the wrong struct member */ instdone->slice_common = intel_uncore_read(uncore, GEN4_INSTDONE1); - break; - case 3: - case 2: + } else { instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE); - break; } } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index ed91bcff20eb..0b4846b01626 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -67,8 +67,8 @@ struct intel_instdone { /* The following exist only in the RCS engine */ u32 slice_common; u32 slice_common_extra[2]; - u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; - u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; + u32 sampler[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; + u32 row[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; }; /* @@ -578,4 +578,12 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ (instdone_has_subslice(dev_priv_, sseu_, slice_, \ subslice_))) + +#define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \ + for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \ + (iter_) < GEN_MAX_SUBSLICES; \ + (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \ + (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \ + for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_))) + #endif /* __INTEL_ENGINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 22fef98887c0..0270acdcc157 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -26,6 +26,9 @@ struct drm_printer; #define GEN_DSS_PER_CSLICE 8 #define GEN_DSS_PER_MSLICE 8 +#define GEN_MAX_GSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_GSLICE) +#define GEN_MAX_CSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_CSLICE) + struct sseu_dev_info { u8 slice_mask; u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; @@ -78,6 +81,10 @@ intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice, u8 mask; int ss_idx = subslice / BITS_PER_BYTE; + if (slice >= sseu->max_slices || + subslice >= sseu->max_subslices) + return false; + GEM_BUG_ON(ss_idx >= sseu->ss_stride); mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx]; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 0f08bcfbe964..8230bc3ac8a9 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -444,15 +444,29 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, if (GRAPHICS_VER(m->i915) <= 6) return; - for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) - err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", - slice, subslice, - ee->instdone.sampler[slice][subslice]); - - for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) - err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", - slice, subslice, - ee->instdone.row[slice][subslice]); + if (GRAPHICS_VER_FULL(m->i915) >= IP_VER(12, 50)) { + int iter; + + for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) + err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.sampler[slice][subslice]); + + for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) + err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.row[slice][subslice]); + } else { + for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) + err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.sampler[slice][subslice]); + + for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) + err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.row[slice][subslice]); + } if (GRAPHICS_VER(m->i915) < 12) return; -- cgit v1.2.3-70-g09d2 From 89f2e7ab4dd93d8785619ce58838391b9b07feb7 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 Aug 2021 09:36:41 -0700 Subject: drm/i915/dg2: Report INSTDONE_GEOM values in error state Xe_HPG adds some additional INSTDONE_GEOM debug registers; the Mesa team has indicated that having these reported in the error state would be useful for debugging GPU hangs. These registers are replicated per-DSS with gslice steering. Cc: Lionel Landwerlin Signed-off-by: Matt Roper Acked-by: Lionel Landwerlin Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20210805163647.801064-4-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 +++++++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 3 +++ drivers/gpu/drm/i915/i915_gpu_error.c | 10 ++++++++-- drivers/gpu/drm/i915/i915_reg.h | 1 + 4 files changed, 19 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 58ed67894b3d..332efea696a5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1202,6 +1202,13 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, GEN7_ROW_INSTDONE); } } + + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { + for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) + instdone->geom_svg[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + XEHPG_INSTDONE_GEOM_SVG); + } } else if (GRAPHICS_VER(i915) >= 7) { instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 0b4846b01626..bfbfe53c23dd 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -69,6 +69,9 @@ struct intel_instdone { u32 slice_common_extra[2]; u32 sampler[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; u32 row[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; + + /* Added in XeHPG */ + u32 geom_svg[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; }; /* diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 8230bc3ac8a9..91d5da7b0a2b 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -431,6 +431,7 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, const struct sseu_dev_info *sseu = &ee->engine->gt->info.sseu; int slice; int subslice; + int iter; err_printf(m, " INSTDONE: 0x%08x\n", ee->instdone.instdone); @@ -445,8 +446,6 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, return; if (GRAPHICS_VER_FULL(m->i915) >= IP_VER(12, 50)) { - int iter; - for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", slice, subslice, @@ -471,6 +470,13 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, if (GRAPHICS_VER(m->i915) < 12) return; + if (GRAPHICS_VER_FULL(m->i915) >= IP_VER(12, 55)) { + for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) + err_printf(m, " GEOM_SVGUNIT_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.geom_svg[slice][subslice]); + } + err_printf(m, " SC_INSTDONE_EXTRA: 0x%08x\n", ee->instdone.slice_common_extra[0]); err_printf(m, " SC_INSTDONE_EXTRA2: 0x%08x\n", diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2113925084b0..9884c1156b95 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2695,6 +2695,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN12_SC_INSTDONE_EXTRA2 _MMIO(0x7108) #define GEN7_SAMPLER_INSTDONE _MMIO(0xe160) #define GEN7_ROW_INSTDONE _MMIO(0xe164) +#define XEHPG_INSTDONE_GEOM_SVG _MMIO(0x666c) #define MCFG_MCR_SELECTOR _MMIO(0xfd0) #define SF_MCR_SELECTOR _MMIO(0xfd8) #define GEN8_MCR_SELECTOR _MMIO(0xfdc) -- cgit v1.2.3-70-g09d2 From d16de9a25b5cc458d0c8c978970f8edf9cf710d0 Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Fri, 6 Aug 2021 10:29:01 -0700 Subject: drm/i915/xehpsdv: Add compute DSS type Starting in XeHP, the concept of slice has been removed in favor of DSS (Dual-Subslice) masks for various workload types. These workloads have been divided into those enabled for geometry and those enabled for compute. i915 currently maintains a single set of S/SS/EU masks for the device. The goal of this patch set is to minimize the amount of impact to prior generations while still giving the user maximum flexibility. v2: - Generalize a comment about uapi access to geometry/compute masks; the proposed uapi has changed since the comment was first written, and will show up in a future series once the userspace code is published. (Lucas) v3: - Eliminate unnecessary has_compute_dss flag. (Lucas) - Drop unwanted comment change in uapi header. (Lucas) Bspec: 33117, 33118, 20376 Cc: Daniele Ceraolo Spurio Cc: Matt Roper Cc: Lucas De Marchi Signed-off-by: Stuart Summers Signed-off-by: Steve Hampson Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20210806172901.1049133-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_sseu.c | 60 +++++++++++++++++++++++++++--------- drivers/gpu/drm/i915/gt/intel_sseu.h | 4 ++- drivers/gpu/drm/i915/i915_reg.h | 3 +- 3 files changed, 50 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index bbd272943c3f..b0e09b58005e 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -46,11 +46,11 @@ u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) } void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, - u32 ss_mask) + u8 *subslice_mask, u32 ss_mask) { int offset = slice * sseu->ss_stride; - memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride); + memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride); } unsigned int @@ -100,14 +100,24 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu) return total; } -static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, - u8 s_en, u32 ss_en, u16 eu_en) +static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en) +{ + u32 ss_mask; + + ss_mask = ss_en >> (s * sseu->max_subslices); + ss_mask &= GENMASK(sseu->max_subslices - 1, 0); + + return ss_mask; +} + +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en, + u32 g_ss_en, u32 c_ss_en, u16 eu_en) { int s, ss; - /* ss_en represents entire subslice mask across all slices */ + /* g_ss_en/c_ss_en represent entire subslice mask across all slices */ GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > - sizeof(ss_en) * BITS_PER_BYTE); + sizeof(g_ss_en) * BITS_PER_BYTE); for (s = 0; s < sseu->max_slices; s++) { if ((s_en & BIT(s)) == 0) @@ -115,7 +125,22 @@ static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, sseu->slice_mask |= BIT(s); - intel_sseu_set_subslices(sseu, s, ss_en); + /* + * XeHP introduces the concept of compute vs geometry DSS. To + * reduce variation between GENs around subslice usage, store a + * mask for both the geometry and compute enabled masks since + * userspace will need to be able to query these masks + * independently. Also compute a total enabled subslice count + * for the purposes of selecting subslices to use in a + * particular GEM context. + */ + intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask, + get_ss_stride_mask(sseu, s, c_ss_en)); + intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask, + get_ss_stride_mask(sseu, s, g_ss_en)); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + get_ss_stride_mask(sseu, s, + g_ss_en | c_ss_en)); for (ss = 0; ss < sseu->max_subslices; ss++) if (intel_sseu_has_subslice(sseu, s, ss)) @@ -129,7 +154,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt) { struct sseu_dev_info *sseu = >->info.sseu; struct intel_uncore *uncore = gt->uncore; - u32 dss_en; + u32 g_dss_en, c_dss_en = 0; u16 eu_en = 0; u8 eu_en_fuse; u8 s_en; @@ -160,7 +185,9 @@ static void gen12_sseu_info_init(struct intel_gt *gt) s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; - dss_en = intel_uncore_read(uncore, GEN12_GT_DSS_ENABLE); + g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) + c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE); /* one bit per pair of EUs */ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) @@ -173,7 +200,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt) if (eu_en_fuse & BIT(eu)) eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); - gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); + gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en); /* TGL only supports slice-level power gating */ sseu->has_slice_pg = 1; @@ -199,7 +226,7 @@ static void gen11_sseu_info_init(struct intel_gt *gt) eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); - gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); + gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en); /* ICL has no power gating restrictions. */ sseu->has_slice_pg = 1; @@ -240,7 +267,7 @@ static void cherryview_sseu_info_init(struct intel_gt *gt) sseu_set_eus(sseu, 0, 1, ~disabled_mask); } - intel_sseu_set_subslices(sseu, 0, subslice_mask); + intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask); sseu->eu_total = compute_eu_total(sseu); @@ -296,7 +323,8 @@ static void gen9_sseu_info_init(struct intel_gt *gt) /* skip disabled slice */ continue; - intel_sseu_set_subslices(sseu, s, subslice_mask); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + subslice_mask); eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s)); for (ss = 0; ss < sseu->max_subslices; ss++) { @@ -408,7 +436,8 @@ static void bdw_sseu_info_init(struct intel_gt *gt) /* skip disabled slice */ continue; - intel_sseu_set_subslices(sseu, s, subslice_mask); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + subslice_mask); for (ss = 0; ss < sseu->max_subslices; ss++) { u8 eu_disabled_mask; @@ -506,7 +535,8 @@ static void hsw_sseu_info_init(struct intel_gt *gt) sseu->eu_per_subslice); for (s = 0; s < sseu->max_slices; s++) { - intel_sseu_set_subslices(sseu, s, subslice_mask); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + subslice_mask); for (ss = 0; ss < sseu->max_subslices; ss++) { sseu_set_eus(sseu, s, ss, diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 0270acdcc157..60882a74741e 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -32,6 +32,8 @@ struct drm_printer; struct sseu_dev_info { u8 slice_mask; u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; + u8 geometry_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; + u8 compute_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE]; u16 eu_total; u8 eu_per_subslice; @@ -104,7 +106,7 @@ intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice); u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, - u32 ss_mask); + u8 *subslice_mask, u32 ss_mask); void intel_sseu_info_init(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 9884c1156b95..c8db6e8ef1ad 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3160,7 +3160,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C) -#define GEN12_GT_DSS_ENABLE _MMIO(0x913C) +#define GEN12_GT_GEOMETRY_DSS_ENABLE _MMIO(0x913C) +#define GEN12_GT_COMPUTE_DSS_ENABLE _MMIO(0x9144) #define XEHP_EU_ENABLE _MMIO(0x9134) #define XEHP_EU_ENA_MASK 0xFF -- cgit v1.2.3-70-g09d2 From b97090575ed27f8a23cc8f8ace642d5a8ea59206 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 10 Aug 2021 15:05:23 +0200 Subject: drm/i915: Use locked access to ctx->engines in set_priority This essentially reverts commit 89ff76bf9b3b0b86e6bbe344bd6378d8661303fc Author: Chris Wilson Date: Thu Apr 2 13:42:18 2020 +0100 drm/i915/gem: Utilize rcu iteration of context engines Note that the other use of __context_engines_await have disappeard in the following commits: ccbc1b97948a ("drm/i915/gem: Don't allow changing the VM on running contexts (v4)") c7a71fc8ee04 ("drm/i915: Drop getparam support for I915_CONTEXT_PARAM_ENGINES") 4a766ae40ec8 ("drm/i915: Drop the CONTEXT_CLONE API (v2)") None of these have any business to optimize their engine lookup with rcu, unless extremely convincing benchmark data and a solid analysis why we can't make that workload (whatever it is that does) faster with a proper design fix. Also since there's only one caller of context_apply_all left and it's really just a loop, inline it and then inline the lopp body too. This is how all other callers that take the engine lock loop over engines, it's much simpler. Reviewed-by: Jason Ekstrand Signed-off-by: Daniel Vetter Cc: Chris Wilson Cc: Mika Kuoppala Cc: Daniel Vetter Cc: Jason Ekstrand Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20210810130523.1972031-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 72 ++++++----------------------- 1 file changed, 14 insertions(+), 58 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index dbaeb924a437..fd169cf2f75a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1284,49 +1284,6 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state) return 0; } -static inline struct i915_gem_engines * -__context_engines_await(const struct i915_gem_context *ctx, - bool *user_engines) -{ - struct i915_gem_engines *engines; - - rcu_read_lock(); - do { - engines = rcu_dereference(ctx->engines); - GEM_BUG_ON(!engines); - - if (user_engines) - *user_engines = i915_gem_context_user_engines(ctx); - - /* successful await => strong mb */ - if (unlikely(!i915_sw_fence_await(&engines->fence))) - continue; - - if (likely(engines == rcu_access_pointer(ctx->engines))) - break; - - i915_sw_fence_complete(&engines->fence); - } while (1); - rcu_read_unlock(); - - return engines; -} - -static void -context_apply_all(struct i915_gem_context *ctx, - void (*fn)(struct intel_context *ce, void *data), - void *data) -{ - struct i915_gem_engines_iter it; - struct i915_gem_engines *e; - struct intel_context *ce; - - e = __context_engines_await(ctx, NULL); - for_each_gem_engine(ce, e, it) - fn(ce, data); - i915_sw_fence_complete(&e->fence); -} - static struct i915_gem_context * i915_gem_create_context(struct drm_i915_private *i915, const struct i915_gem_proto_context *pc) @@ -1776,23 +1733,11 @@ set_persistence(struct i915_gem_context *ctx, return __context_set_persistence(ctx, args->value); } -static void __apply_priority(struct intel_context *ce, void *arg) -{ - struct i915_gem_context *ctx = arg; - - if (!intel_engine_has_timeslices(ce->engine)) - return; - - if (ctx->sched.priority >= I915_PRIORITY_NORMAL && - intel_engine_has_semaphores(ce->engine)) - intel_context_set_use_semaphores(ce); - else - intel_context_clear_use_semaphores(ce); -} - static int set_priority(struct i915_gem_context *ctx, const struct drm_i915_gem_context_param *args) { + struct i915_gem_engines_iter it; + struct intel_context *ce; int err; err = validate_priority(ctx->i915, args); @@ -1800,7 +1745,18 @@ static int set_priority(struct i915_gem_context *ctx, return err; ctx->sched.priority = args->value; - context_apply_all(ctx, __apply_priority, ctx); + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_has_timeslices(ce->engine)) + continue; + + if (ctx->sched.priority >= I915_PRIORITY_NORMAL && + intel_engine_has_semaphores(ce->engine)) + intel_context_set_use_semaphores(ce); + else + intel_context_clear_use_semaphores(ce); + } + i915_gem_context_unlock_engines(ctx); return 0; } -- cgit v1.2.3-70-g09d2 From efd330b97855013c8b58185683ddfb75deab5fa9 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 5 Aug 2021 09:36:43 -0700 Subject: drm/i915/xehpsdv: factor out function to read RP_STATE_CAP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of maintaining the same if ladder in 3 different places, add a function to read RP_STATE_CAP. Signed-off-by: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210805163647.801064-6-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/debugfs_gt_pm.c | 8 +++----- drivers/gpu/drm/i915/gt/intel_rps.c | 17 ++++++++++++----- drivers/gpu/drm/i915/gt/intel_rps.h | 1 + drivers/gpu/drm/i915/i915_debugfs.c | 8 +++----- 4 files changed, 19 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c index d6f5836396f8..f6733f279890 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c @@ -309,13 +309,11 @@ static int frequency_show(struct seq_file *m, void *unused) int max_freq; rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); - if (IS_GEN9_LP(i915)) { - rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); + rp_state_cap = intel_rps_read_state_cap(rps); + if (IS_GEN9_LP(i915)) gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); - } else { - rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); + else gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); - } /* RPSTAT1 is in the GT power well */ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index d812b27835f8..a3e69eba376f 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -996,20 +996,16 @@ int intel_rps_set(struct intel_rps *rps, u8 val) static void gen6_rps_init(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); - struct intel_uncore *uncore = rps_to_uncore(rps); + u32 rp_state_cap = intel_rps_read_state_cap(rps); /* All of these values are in units of 50MHz */ /* static values from HW: RP0 > RP1 > RPn (min_freq) */ if (IS_GEN9_LP(i915)) { - u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); - rps->rp0_freq = (rp_state_cap >> 16) & 0xff; rps->rp1_freq = (rp_state_cap >> 8) & 0xff; rps->min_freq = (rp_state_cap >> 0) & 0xff; } else { - u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); - rps->rp0_freq = (rp_state_cap >> 0) & 0xff; rps->rp1_freq = (rp_state_cap >> 8) & 0xff; rps->min_freq = (rp_state_cap >> 16) & 0xff; @@ -2140,6 +2136,17 @@ int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val) return set_min_freq(rps, val); } +u32 intel_rps_read_state_cap(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + if (IS_GEN9_LP(i915)) + return intel_uncore_read(uncore, BXT_RP_STATE_CAP); + else + return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); +} + /* External interface for intel_ips.ko */ static struct drm_i915_private __rcu *ips_mchdev; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 4213bcce1667..11960d64ca82 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -41,6 +41,7 @@ u32 intel_rps_get_rp1_frequency(struct intel_rps *rps); u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); u32 intel_rps_read_punit_req(struct intel_rps *rps); u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); +u32 intel_rps_read_state_cap(struct intel_rps *rps); void gen5_rps_irq_handler(struct intel_rps *rps); void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index cc745751ac53..6c83da3956b9 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -420,13 +420,11 @@ static int i915_frequency_info(struct seq_file *m, void *unused) int max_freq; rp_state_limits = intel_uncore_read(&dev_priv->uncore, GEN6_RP_STATE_LIMITS); - if (IS_GEN9_LP(dev_priv)) { - rp_state_cap = intel_uncore_read(&dev_priv->uncore, BXT_RP_STATE_CAP); + rp_state_cap = intel_rps_read_state_cap(rps); + if (IS_GEN9_LP(dev_priv)) gt_perf_status = intel_uncore_read(&dev_priv->uncore, BXT_GT_PERF_STATUS); - } else { - rp_state_cap = intel_uncore_read(&dev_priv->uncore, GEN6_RP_STATE_CAP); + else gt_perf_status = intel_uncore_read(&dev_priv->uncore, GEN6_GT_PERF_STATUS); - } /* RPSTAT1 is in the GT power well */ intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); -- cgit v1.2.3-70-g09d2 From ad482232e3cc6d65eaeb19ce2412887458b19559 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 Aug 2021 09:36:44 -0700 Subject: drm/i915/xehpsdv: Read correct RP_STATE_CAP register The RP_STATE_CAP register is no longer part of the MCHBAR on XEHPSDV; this register is now a per-tile register at GTTMMADDR offset 0x250014. Cc: Rodrigo Vivi Signed-off-by: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210805163647.801064-7-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_rps.c | 4 +++- drivers/gpu/drm/i915/i915_reg.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index a3e69eba376f..3489f5f0cac1 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2141,7 +2141,9 @@ u32 intel_rps_read_state_cap(struct intel_rps *rps) struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); - if (IS_GEN9_LP(i915)) + if (IS_XEHPSDV(i915)) + return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP); + else if (IS_GEN9_LP(i915)) return intel_uncore_read(uncore, BXT_RP_STATE_CAP); else return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c8db6e8ef1ad..f79f02ee12db 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4124,6 +4124,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RPN_CAP_MASK REG_GENMASK(23, 16) #define BXT_RP_STATE_CAP _MMIO(0x138170) #define GEN9_RP_STATE_LIMITS _MMIO(0x138148) +#define XEHPSDV_RP_STATE_CAP _MMIO(0x250014) /* * Logical Context regs -- cgit v1.2.3-70-g09d2 From d5ef86b38e4c2a65d5c1d64d8d0f3fcf58aa0884 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 12 Aug 2021 14:44:52 +0200 Subject: drm/i915: Add pci ids and uapi for DG1 DG1 has support for local memory, which requires the usage of the lmem placement extension for creating bo's, and memregion queries to obtain the size. Because of this, those parts of the uapi are no longer guarded behind FAKE_LMEM. According to the pull request referenced below, mesa should be mostly ready for DG1. VK_EXT_memory_budget is not hooked up yet, but we should definitely just enable the uapi parts by default. Signed-off-by: Maarten Lankhorst References: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11584 Cc: Jordan Justen Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210812124452.622233-2-maarten.lankhorst@linux.intel.com Acked-by: Daniel Vetter Acked-by: Jason Ekstrand --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 3 --- drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/i915_query.c | 3 --- 3 files changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index 23fee13a3384..1d341b8c47c0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -347,9 +347,6 @@ static int ext_set_placements(struct i915_user_extension __user *base, { struct drm_i915_gem_create_ext_memory_regions ext; - if (!IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)) - return -ENODEV; - if (copy_from_user(&ext, base, sizeof(ext))) return -EFAULT; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 32358f90b920..607459251b49 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1096,6 +1096,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_RKL_IDS(&rkl_info), INTEL_ADLS_IDS(&adl_s_info), INTEL_ADLP_IDS(&adl_p_info), + INTEL_DG1_IDS(&dg1_info), {0, 0, 0} }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index e49da36c62fb..5e2b909827f4 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -432,9 +432,6 @@ static int query_memregion_info(struct drm_i915_private *i915, u32 total_length; int ret, id, i; - if (!IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)) - return -ENODEV; - if (query_item->flags != 0) return -EINVAL; -- cgit v1.2.3-70-g09d2 From 90fd2194a0cc52eb7a61dfa6412a0e498c58c688 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 17 Aug 2021 16:33:57 -0700 Subject: drm/i915: Use designated initializers for init/exit table The kernel builds with -Werror=designated-init, and __designated_init is used by CONFIG_GCC_PLUGIN_RANDSTRUCT for automatically selected (all function pointer) structures. Include the field names in the init/exit table. Avoids warnings like: drivers/gpu/drm/i915/i915_module.c:59:4: error: positional initialization of field in 'struct' declared with 'designated_init' attribute [-Werror=designated-init] Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: David Airlie Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Fixes: a04ea6ae7c67 ("drm/i915: Use a table for i915_init/exit (v2)") Signed-off-by: Kees Cook Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210817233357.2379455-1-keescook@chromium.org --- drivers/gpu/drm/i915/i915_module.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_module.c b/drivers/gpu/drm/i915/i915_module.c index c578ea8f56a0..d8b4482c69d0 100644 --- a/drivers/gpu/drm/i915/i915_module.c +++ b/drivers/gpu/drm/i915/i915_module.c @@ -47,19 +47,30 @@ static const struct { int (*init)(void); void (*exit)(void); } init_funcs[] = { - { i915_check_nomodeset, NULL }, - { i915_active_module_init, i915_active_module_exit }, - { i915_buddy_module_init, i915_buddy_module_exit }, - { i915_context_module_init, i915_context_module_exit }, - { i915_gem_context_module_init, i915_gem_context_module_exit }, - { i915_objects_module_init, i915_objects_module_exit }, - { i915_request_module_init, i915_request_module_exit }, - { i915_scheduler_module_init, i915_scheduler_module_exit }, - { i915_vma_module_init, i915_vma_module_exit }, - { i915_mock_selftests, NULL }, - { i915_pmu_init, i915_pmu_exit }, - { i915_register_pci_driver, i915_unregister_pci_driver }, - { i915_perf_sysctl_register, i915_perf_sysctl_unregister }, + { .init = i915_check_nomodeset }, + { .init = i915_active_module_init, + .exit = i915_active_module_exit }, + { .init = i915_buddy_module_init, + .exit = i915_buddy_module_exit }, + { .init = i915_context_module_init, + .exit = i915_context_module_exit }, + { .init = i915_gem_context_module_init, + .exit = i915_gem_context_module_exit }, + { .init = i915_objects_module_init, + .exit = i915_objects_module_exit }, + { .init = i915_request_module_init, + .exit = i915_request_module_exit }, + { .init = i915_scheduler_module_init, + .exit = i915_scheduler_module_exit }, + { .init = i915_vma_module_init, + .exit = i915_vma_module_exit }, + { .init = i915_mock_selftests }, + { .init = i915_pmu_init, + .exit = i915_pmu_exit }, + { .init = i915_register_pci_driver, + .exit = i915_unregister_pci_driver }, + { .init = i915_perf_sysctl_register, + .exit = i915_perf_sysctl_unregister }, }; static int init_progress; -- cgit v1.2.3-70-g09d2 From 9e9dfd080201ec6236df7151fb7127fe9c594996 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 Aug 2021 09:36:46 -0700 Subject: drm/i915/dg2: Maintain backward-compatible nested batch behavior For tgl+, the per-context setting of MI_MODE[12] determines whether the bits of a nested MI_BATCH_BUFFER_START instruction should be interpreted in the traditional manner or whether they should instead use a new tgl+ meaning that breaks backward compatibility, but allows nesting into 3rd-level batchbuffers. For previous platforms, the hardware default for this register bit is to maintain backward-compatible behavior unless a context intentionally opts into the new behavior; however Xe_HPG flips the hardware default behavior. From a SW perspective, we want to maintain the backward-compatible behavior for userspace, so we'll apply a fake workaround to set it back to the legacy behavior on platforms where the hardware default is to break compatibility. At the moment there is no Linux userspace that utilizes third-level batchbuffers, so this will avoid userspace from needing to make any changes. using the legacy meaning is the correct thing to do. If/when we have userspace consumers that want to utilize third-level batch nesting, we can provide a context parameter to allow them to opt-in. Bspec: 45974, 45718 Cc: John Harrison Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210805163647.801064-9-matthew.d.roper@intel.com Reviewed-by: Anusha Srivatsa --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 39 +++++++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_reg.h | 1 + 2 files changed, 38 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index e9b8af9f08ea..688ed04edbf6 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -644,6 +644,37 @@ static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE); } +static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + /* + * This is a "fake" workaround defined by software to ensure we + * maintain reliable, backward-compatible behavior for userspace with + * regards to how nested MI_BATCH_BUFFER_START commands are handled. + * + * The per-context setting of MI_MODE[12] determines whether the bits + * of a nested MI_BATCH_BUFFER_START instruction should be interpreted + * in the traditional manner or whether they should instead use a new + * tgl+ meaning that breaks backward compatibility, but allows nesting + * into 3rd-level batchbuffers. When this new capability was first + * added in TGL, it remained off by default unless a context + * intentionally opted in to the new behavior. However Xe_HPG now + * flips this on by default and requires that we explicitly opt out if + * we don't want the new behavior. + * + * From a SW perspective, we want to maintain the backward-compatible + * behavior for userspace, so we'll apply a fake workaround to set it + * back to the legacy behavior on platforms where the hardware default + * is to break compatibility. At the moment there is no Linux + * userspace that utilizes third-level batchbuffers, so this will avoid + * userspace from needing to make any changes. using the legacy + * meaning is the correct thing to do. If/when we have userspace + * consumers that want to utilize third-level batch nesting, we can + * provide a context parameter to allow them to opt-in. + */ + wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN); +} + static void __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, struct i915_wa_list *wal, @@ -651,11 +682,15 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, { struct drm_i915_private *i915 = engine->i915; + wa_init_start(wal, name, engine->name); + + /* Applies to all engines */ + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) + fakewa_disable_nestedbb_mode(engine, wal); + if (engine->class != RENDER_CLASS) return; - wa_init_start(wal, name, engine->name); - if (IS_DG1(i915)) dg1_ctx_workarounds_init(engine, wal); else if (GRAPHICS_VER(i915) == 12) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f79f02ee12db..b289f5cefa59 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2830,6 +2830,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MI_MODE _MMIO(0x209c) # define VS_TIMER_DISPATCH (1 << 6) # define MI_FLUSH_ENABLE (1 << 12) +# define TGL_NESTED_BB_EN (1 << 12) # define ASYNC_FLIP_PERF_DISABLE (1 << 14) # define MODE_IDLE (1 << 9) # define STOP_RING (1 << 8) -- cgit v1.2.3-70-g09d2 From faf890985e30d5e88cc3a7c50c1bcad32f89ab7c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 30 Jul 2021 12:53:42 -0700 Subject: drm/i915: Fix syncmap memory leak A small race exists between intel_gt_retire_requests_timeout and intel_timeline_exit which could result in the syncmap not getting free'd. Rather than work to hard to seal this race, simply cleanup the syncmap on fini. unreferenced object 0xffff88813bc53b18 (size 96): comm "gem_close_race", pid 5410, jiffies 4294917818 (age 1105.600s) hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 0a 00 00 00 ................ 00 00 00 00 00 00 00 00 6b 6b 6b 6b 06 00 00 00 ........kkkk.... backtrace: [<00000000120b863a>] __sync_alloc_leaf+0x1e/0x40 [i915] [<00000000042f6959>] __sync_set+0x1bb/0x240 [i915] [<0000000090f0e90f>] i915_request_await_dma_fence+0x1c7/0x400 [i915] [<0000000056a48219>] i915_request_await_object+0x222/0x360 [i915] [<00000000aaac4ee3>] i915_gem_do_execbuffer+0x1bd0/0x2250 [i915] [<000000003c9d830f>] i915_gem_execbuffer2_ioctl+0x405/0xce0 [i915] [<00000000fd7a8e68>] drm_ioctl_kernel+0xb0/0xf0 [drm] [<00000000e721ee87>] drm_ioctl+0x305/0x3c0 [drm] [<000000008b0d8986>] __x64_sys_ioctl+0x71/0xb0 [<0000000076c362a4>] do_syscall_64+0x33/0x80 [<00000000eb7a4831>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Signed-off-by: Matthew Brost Fixes: 531958f6f357 ("drm/i915/gt: Track timeline activeness in enter/exit") Cc: Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210730195342.110234-1-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_timeline.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index c4a126c8caef..1257f4f11e66 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -127,6 +127,15 @@ static void intel_timeline_fini(struct rcu_head *rcu) i915_vma_put(timeline->hwsp_ggtt); i915_active_fini(&timeline->active); + + /* + * A small race exists between intel_gt_retire_requests_timeout and + * intel_timeline_exit which could result in the syncmap not getting + * free'd. Rather than work to hard to seal this race, simply cleanup + * the syncmap on fini. + */ + i915_syncmap_free(&timeline->sync); + kfree(timeline); } -- cgit v1.2.3-70-g09d2 From c9b6e94963bc3a53110f1c9cd7e5e4ae571413cd Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Mon, 16 Aug 2021 19:14:44 +0200 Subject: drm/i915: Ditch the i915_gem_ww_ctx loop member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's only used by the for_i915_gem_ww() macro and we can use the (typically) on-stack _err variable in its place. v2: - Don't clear the _err variable when entering the loop (Matthew Auld, Maarten Lankhorst). - Use parentheses around the _err macro argument. - Fix up comment. Cc: Matthew Auld Suggested-by: Maarten Lankhorst Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210816171444.105469-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_ww.h | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_gem_ww.h b/drivers/gpu/drm/i915/i915_gem_ww.h index f6b1a796667b..86f0fe343de6 100644 --- a/drivers/gpu/drm/i915/i915_gem_ww.h +++ b/drivers/gpu/drm/i915/i915_gem_ww.h @@ -11,8 +11,7 @@ struct i915_gem_ww_ctx { struct ww_acquire_ctx ctx; struct list_head obj_list; struct drm_i915_gem_object *contended; - unsigned short intr; - unsigned short loop; + bool intr; }; void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr); @@ -20,31 +19,23 @@ void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx); int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx); void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj); -/* Internal functions used by the inlines! Don't use. */ +/* Internal function used by the inlines! Don't use. */ static inline int __i915_gem_ww_fini(struct i915_gem_ww_ctx *ww, int err) { - ww->loop = 0; if (err == -EDEADLK) { err = i915_gem_ww_ctx_backoff(ww); if (!err) - ww->loop = 1; + err = -EDEADLK; } - if (!ww->loop) + if (err != -EDEADLK) i915_gem_ww_ctx_fini(ww); return err; } -static inline void -__i915_gem_ww_init(struct i915_gem_ww_ctx *ww, bool intr) -{ - i915_gem_ww_ctx_init(ww, intr); - ww->loop = 1; -} - -#define for_i915_gem_ww(_ww, _err, _intr) \ - for (__i915_gem_ww_init(_ww, _intr); (_ww)->loop; \ - _err = __i915_gem_ww_fini(_ww, _err)) - +#define for_i915_gem_ww(_ww, _err, _intr) \ + for (i915_gem_ww_ctx_init(_ww, _intr), (_err) = -EDEADLK; \ + (_err) == -EDEADLK; \ + (_err) = __i915_gem_ww_fini(_ww, _err)) #endif -- cgit v1.2.3-70-g09d2 From 5359b745146aa596026addffd7843e1735db7bbd Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 19 Aug 2021 10:34:18 +0100 Subject: drm/i915/buddy: add some pretty printing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the debug hook for the buddy resource manager. For this we want to print out the status of the memory manager, including how much memory is still allocatable, what page sizes we have etc. This will be triggered when TTM is unable to fulfil an allocation request for device local-memory. v2(Thomas): - s/MB/MiB - s/KB/KiB Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210819093419.295636-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_buddy.c | 45 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_buddy.h | 8 +++++ drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 20 +++++++++++- 3 files changed, 72 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_buddy.c b/drivers/gpu/drm/i915/i915_buddy.c index 7b274c51cac0..6e2ad68f8f3f 100644 --- a/drivers/gpu/drm/i915/i915_buddy.c +++ b/drivers/gpu/drm/i915/i915_buddy.c @@ -4,6 +4,7 @@ */ #include +#include #include "i915_buddy.h" @@ -82,6 +83,7 @@ int i915_buddy_init(struct i915_buddy_mm *mm, u64 size, u64 chunk_size) size = round_down(size, chunk_size); mm->size = size; + mm->avail = size; mm->chunk_size = chunk_size; mm->max_order = ilog2(size) - ilog2(chunk_size); @@ -155,6 +157,8 @@ void i915_buddy_fini(struct i915_buddy_mm *mm) i915_block_free(mm, mm->roots[i]); } + GEM_WARN_ON(mm->avail != mm->size); + kfree(mm->roots); kfree(mm->free_list); } @@ -230,6 +234,7 @@ void i915_buddy_free(struct i915_buddy_mm *mm, struct i915_buddy_block *block) { GEM_BUG_ON(!i915_buddy_block_is_allocated(block)); + mm->avail += i915_buddy_block_size(mm, block); __i915_buddy_free(mm, block); } @@ -283,6 +288,7 @@ i915_buddy_alloc(struct i915_buddy_mm *mm, unsigned int order) } mark_allocated(block); + mm->avail -= i915_buddy_block_size(mm, block); kmemleak_update_trace(block); return block; @@ -368,6 +374,7 @@ int i915_buddy_alloc_range(struct i915_buddy_mm *mm, } mark_allocated(block); + mm->avail -= i915_buddy_block_size(mm, block); list_add_tail(&block->link, &allocated); continue; } @@ -402,6 +409,44 @@ err_free: return err; } +void i915_buddy_block_print(struct i915_buddy_mm *mm, + struct i915_buddy_block *block, + struct drm_printer *p) +{ + u64 start = i915_buddy_block_offset(block); + u64 size = i915_buddy_block_size(mm, block); + + drm_printf(p, "%#018llx-%#018llx: %llu\n", start, start + size, size); +} + +void i915_buddy_print(struct i915_buddy_mm *mm, struct drm_printer *p) +{ + int order; + + drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB\n", + mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20); + + for (order = mm->max_order; order >= 0; order--) { + struct i915_buddy_block *block; + u64 count = 0, free; + + list_for_each_entry(block, &mm->free_list[order], link) { + GEM_BUG_ON(!i915_buddy_block_is_free(block)); + count++; + } + + drm_printf(p, "order-%d ", order); + + free = count * (mm->chunk_size << order); + if (free < SZ_1M) + drm_printf(p, "free: %lluKiB", free >> 10); + else + drm_printf(p, "free: %lluMiB", free >> 20); + + drm_printf(p, ", pages: %llu\n", count); + } +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_buddy.c" #endif diff --git a/drivers/gpu/drm/i915/i915_buddy.h b/drivers/gpu/drm/i915/i915_buddy.h index 3940d632f208..7077742112ac 100644 --- a/drivers/gpu/drm/i915/i915_buddy.h +++ b/drivers/gpu/drm/i915/i915_buddy.h @@ -10,6 +10,8 @@ #include #include +#include + struct i915_buddy_block { #define I915_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) #define I915_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) @@ -69,6 +71,7 @@ struct i915_buddy_mm { /* Must be at least PAGE_SIZE */ u64 chunk_size; u64 size; + u64 avail; }; static inline u64 @@ -129,6 +132,11 @@ void i915_buddy_free(struct i915_buddy_mm *mm, struct i915_buddy_block *block); void i915_buddy_free_list(struct i915_buddy_mm *mm, struct list_head *objects); +void i915_buddy_print(struct i915_buddy_mm *mm, struct drm_printer *p); +void i915_buddy_block_print(struct i915_buddy_mm *mm, + struct i915_buddy_block *block, + struct drm_printer *p); + void i915_buddy_module_exit(void); int i915_buddy_module_init(void); diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c index 6877362f6b85..d59fbb019032 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -126,12 +126,30 @@ static void i915_ttm_buddy_man_free(struct ttm_resource_manager *man, kfree(bman_res); } +static void i915_ttm_buddy_man_debug(struct ttm_resource_manager *man, + struct drm_printer *printer) +{ + struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); + struct i915_buddy_block *block; + + mutex_lock(&bman->lock); + drm_printf(printer, "default_page_size: %lluKiB\n", + bman->default_page_size >> 10); + + i915_buddy_print(&bman->mm, printer); + + drm_printf(printer, "reserved:\n"); + list_for_each_entry(block, &bman->reserved, link) + i915_buddy_block_print(&bman->mm, block, printer); + mutex_unlock(&bman->lock); +} + static const struct ttm_resource_manager_func i915_ttm_buddy_manager_func = { .alloc = i915_ttm_buddy_man_alloc, .free = i915_ttm_buddy_man_free, + .debug = i915_ttm_buddy_man_debug, }; - /** * i915_ttm_buddy_man_init - Setup buddy allocator based ttm manager * @bdev: The ttm device -- cgit v1.2.3-70-g09d2 From 8c3363c67b885fe40f50a8010a0768c4fd1e4b95 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 19 Aug 2021 10:34:19 +0100 Subject: drm/i915/debugfs: hook up ttm_resource_manager_debug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should give a more complete view of the various bits of internal resource manager state, for device local-memory. v2(Thomas): - Move the region printing into a nice helper Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210819093419.295636-2-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- drivers/gpu/drm/i915/intel_memory_region.c | 12 ++++++++++++ drivers/gpu/drm/i915/intel_memory_region.h | 4 ++++ 3 files changed, 18 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6c83da3956b9..2829df0aa65c 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -238,6 +238,7 @@ i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) static int i915_gem_object_info(struct seq_file *m, void *data) { struct drm_i915_private *i915 = node_to_i915(m->private); + struct drm_printer p = drm_seq_file_printer(m); struct intel_memory_region *mr; enum intel_region_id id; @@ -246,8 +247,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) atomic_read(&i915->mm.free_count), i915->mm.shrink_memory); for_each_memory_region(mr, i915, id) - seq_printf(m, "%s: total:%pa, available:%pa bytes\n", - mr->name, &mr->total, &mr->avail); + intel_memory_region_debug(mr, &p); return 0; } diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index 779eb2fa90b6..e7f7e6627750 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -78,6 +78,18 @@ int intel_memory_region_reserve(struct intel_memory_region *mem, return i915_ttm_buddy_man_reserve(man, offset, size); } +void intel_memory_region_debug(struct intel_memory_region *mr, + struct drm_printer *printer) +{ + drm_printf(printer, "%s: ", mr->name); + + if (mr->region_private) + ttm_resource_manager_debug(mr->region_private, printer); + else + drm_printf(printer, "total:%pa, available:%pa bytes\n", + &mr->total, &mr->avail); +} + struct intel_memory_region * intel_memory_region_create(struct drm_i915_private *i915, resource_size_t start, diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 1f2b96efa69d..3feae3353d33 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -15,6 +15,7 @@ struct drm_i915_private; struct drm_i915_gem_object; +struct drm_printer; struct intel_memory_region; struct sg_table; struct ttm_resource; @@ -127,6 +128,9 @@ int intel_memory_region_reserve(struct intel_memory_region *mem, resource_size_t offset, resource_size_t size); +void intel_memory_region_debug(struct intel_memory_region *mr, + struct drm_printer *printer); + struct intel_memory_region * i915_gem_ttm_system_setup(struct drm_i915_private *i915, u16 type, u16 instance); -- cgit v1.2.3-70-g09d2 From ff12ce2c9cb1cd09017151424db66de803984abc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 13 Aug 2021 14:36:00 +0300 Subject: drm/i915/gt: Potential error pointer dereference in pinned_context() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the intel_engine_create_pinned_context() function returns an error pointer, then dereferencing "ce" will Oops. Use "vm" instead of "ce->vm". Fixes: cf586021642d ("drm/i915/gt: Pipelined page migration") Signed-off-by: Dan Carpenter Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210813113600.GC30697@kili --- drivers/gpu/drm/i915/gt/intel_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index d0a7c934fd3b..1dac21aa7e5c 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -177,7 +177,7 @@ static struct intel_context *pinned_context(struct intel_gt *gt) ce = intel_engine_create_pinned_context(engine, vm, SZ_512K, I915_GEM_HWS_MIGRATE, &key, "migrate"); - i915_vm_put(ce->vm); + i915_vm_put(vm); return ce; } -- cgit v1.2.3-70-g09d2 From 81a14bedae5ba88e2e2c6a53fd8f62dddabf51d2 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 19 Aug 2021 14:03:49 -0700 Subject: drm/i915/dg1: remove __maybe_unused leftover This was added in commit 05e265841f7e ("drm/i915/dg1: add initial DG-1 definitions") so we could continue to add support for DG1 without risk to expose a broken UAPI. Now that we added DG1 to the PCI ID list i915 may bind to, remove the leftover. Fixes: d5ef86b38e4c ("drm/i915: Add pci ids and uapi for DG1") Signed-off-by: Lucas De Marchi Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20210819210349.95103-1-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 607459251b49..3fb55e2404b6 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -909,7 +909,7 @@ static const struct intel_device_info rkl_info = { .has_snoop = 1, \ .is_dgfx = 1 -static const struct intel_device_info dg1_info __maybe_unused = { +static const struct intel_device_info dg1_info = { GEN12_FEATURES, DGFX_FEATURES, PLATFORM(INTEL_DG1), -- cgit v1.2.3-70-g09d2 From ac5a2dff428ab59b5c5bbb4b28311141aa461c07 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 13 Aug 2021 10:11:58 -0700 Subject: drm/i915/selftest: Fix use of err in igt_reset_{fail, nop}_engine() Clang warns: In file included from drivers/gpu/drm/i915/gt/intel_reset.c:1514: drivers/gpu/drm/i915/gt/selftest_hangcheck.c:465:62: warning: variable 'err' is uninitialized when used here [-Wuninitialized] pr_err("[%s] Create context failed: %d!\n", engine->name, err); ^~~ ... drivers/gpu/drm/i915/gt/selftest_hangcheck.c:580:62: warning: variable 'err' is uninitialized when used here [-Wuninitialized] pr_err("[%s] Create context failed: %d!\n", engine->name, err); ^~~ ... 2 warnings generated. This appears to be a copy and paste issue. Use ce directly using the %pe specifier to pretty print the error code so that err is not used uninitialized in these functions. Fixes: 3a7b72665ea5 ("drm/i915/selftest: Bump selftest timeouts for hangcheck") Reported-by: Dan Carpenter Signed-off-by: Nathan Chancellor Reviewed-by: Joonas Lahtinen Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20210813171158.2665823-1-nathan@kernel.org --- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 08f011f893b2..2c1ed32ca5ac 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -462,7 +462,7 @@ static int igt_reset_nop_engine(void *arg) ce = intel_context_create(engine); if (IS_ERR(ce)) { - pr_err("[%s] Create context failed: %d!\n", engine->name, err); + pr_err("[%s] Create context failed: %pe!\n", engine->name, ce); return PTR_ERR(ce); } @@ -577,7 +577,7 @@ static int igt_reset_fail_engine(void *arg) ce = intel_context_create(engine); if (IS_ERR(ce)) { - pr_err("[%s] Create context failed: %d!\n", engine->name, err); + pr_err("[%s] Create context failed: %pe!\n", engine->name, ce); return PTR_ERR(ce); } -- cgit v1.2.3-70-g09d2 From 6321a722374bf23b09095cf3077c59cf5e6d3a78 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 12 Jul 2021 19:18:15 +0300 Subject: drm/i915: s/0/NULL/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use NULL where appropriate. drivers/gpu/drm/i915/gt/intel_ring_submission.c:1210:24: warning: Using plain integer as NULL pointer Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210712161815.24776-2-ville.syrjala@linux.intel.com Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 2958e2fae380..3c65efcb7bed 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -1265,7 +1265,7 @@ static struct i915_vma *gen7_ctx_vma(struct intel_engine_cs *engine) int size, err; if (GRAPHICS_VER(engine->i915) != 7 || engine->class != RENDER_CLASS) - return 0; + return NULL; err = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */); if (err < 0) -- cgit v1.2.3-70-g09d2 From 5e076529e2652244ec20a86d8f99ba634a16c4f4 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 20:17:03 -0700 Subject: drm/i915/selftests: Increase timeout in i915_gem_contexts selftests Like in the case of several other selftests, generating lots of requests in a loop takes a bit longer with GuC submission. Increase a timeout in i915_gem_contexts selftest to take this into account. Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727031703.40395-2-matthew.brost@intel.com --- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 8eb5050f8cb3..4d2758718d21 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -94,7 +94,7 @@ static int live_nop_switch(void *arg) rq = i915_request_get(this); i915_request_add(this); } - if (i915_request_wait(rq, 0, HZ / 5) < 0) { + if (i915_request_wait(rq, 0, HZ) < 0) { pr_err("Failed to populated %d contexts\n", nctx); intel_gt_set_wedged(&i915->gt); i915_request_put(rq); -- cgit v1.2.3-70-g09d2 From ae4b0eacaffe6b69ace47b224909bf757767d40b Mon Sep 17 00:00:00 2001 From: Akeem G Abodunrin Date: Thu, 5 Aug 2021 09:36:45 -0700 Subject: drm/i915/dg2: Add new LRI reg offsets New LRI register offsets were introduced for DG2, this patch adds those extra registers, and create new register table for setting offsets to compare with HW generated context image - especially for gt_lrc test. Also updates general purpose register with scratch offset for DG2, in order to use it for live_lrc_fixed selftest. Cc: Chris P Wilson Cc: Prathap Kumar Valsan Signed-off-by: Akeem G Abodunrin Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210805163647.801064-8-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_lrc.c | 85 ++++++++++++++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index bb4af4977920..6ba8daea2f56 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -226,6 +226,40 @@ static const u8 gen12_xcs_offsets[] = { END }; +static const u8 dg2_xcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + END +}; + static const u8 gen8_rcs_offsets[] = { NOP(1), LRI(14, POSTED), @@ -525,6 +559,49 @@ static const u8 xehp_rcs_offsets[] = { END }; +static const u8 dg2_rcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END +}; + #undef END #undef REG16 #undef REG @@ -543,7 +620,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) !intel_engine_has_relative_mmio(engine)); if (engine->class == RENDER_CLASS) { - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + return dg2_rcs_offsets; + else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) return xehp_rcs_offsets; else if (GRAPHICS_VER(engine->i915) >= 12) return gen12_rcs_offsets; @@ -554,7 +633,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) else return gen8_rcs_offsets; } else { - if (GRAPHICS_VER(engine->i915) >= 12) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + return dg2_xcs_offsets; + else if (GRAPHICS_VER(engine->i915) >= 12) return gen12_xcs_offsets; else if (GRAPHICS_VER(engine->i915) >= 9) return gen9_xcs_offsets; -- cgit v1.2.3-70-g09d2 From d8ac30fd479cf0f0b37c7f06b06b50c18f57c548 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 13 Aug 2021 16:43:30 +0200 Subject: drm/i915/ttm: Reorganize the ttm move code somewhat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to make the code a bit more readable and to facilitate async memcpy moves, reorganize the move code a little. Determine at an early stage whether to copy or to clear. v2: - Don't set up the memcpy iterators unless we are actually going to memcpy. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20210813144331.372957-2-thomas.hellstrom@linux.intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20210813144331.372957-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 77 +++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 771eb2963123..d07de18529ab 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -431,6 +431,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, } static int i915_ttm_accel_move(struct ttm_buffer_object *bo, + bool clear, struct ttm_resource *dst_mem, struct sg_table *dst_st) { @@ -449,13 +450,10 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return -EINVAL; dst_level = i915_ttm_cache_level(i915, dst_mem, ttm); - if (!ttm || !ttm_tt_is_populated(ttm)) { + if (clear) { if (bo->type == ttm_bo_type_kernel) return -EINVAL; - if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) - return 0; - intel_engine_pm_get(i915->gt.migrate.context->engine); ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL, dst_st->sgl, dst_level, @@ -489,6 +487,41 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return ret; } +static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, + struct ttm_resource *dst_mem, + struct sg_table *dst_st) +{ + int ret; + + ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st); + if (ret) { + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct intel_memory_region *dst_reg, *src_reg; + union { + struct ttm_kmap_iter_tt tt; + struct ttm_kmap_iter_iomap io; + } _dst_iter, _src_iter; + struct ttm_kmap_iter *dst_iter, *src_iter; + + dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); + src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); + GEM_BUG_ON(!dst_reg || !src_reg); + + dst_iter = !cpu_maps_iomem(dst_mem) ? + ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, + dst_st, dst_reg->region.start); + + src_iter = !cpu_maps_iomem(bo->resource) ? + ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, + obj->ttm.cached_io_st, + src_reg->region.start); + + ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + } +} + static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_resource *dst_mem, @@ -497,19 +530,11 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); struct ttm_resource_manager *dst_man = ttm_manager_type(bo->bdev, dst_mem->mem_type); - struct intel_memory_region *dst_reg, *src_reg; - union { - struct ttm_kmap_iter_tt tt; - struct ttm_kmap_iter_iomap io; - } _dst_iter, _src_iter; - struct ttm_kmap_iter *dst_iter, *src_iter; + struct ttm_tt *ttm = bo->ttm; struct sg_table *dst_st; + bool clear; int ret; - dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); - src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); - GEM_BUG_ON(!dst_reg || !src_reg); - /* Sync for now. We could do the actual copy async. */ ret = ttm_bo_wait_ctx(bo, ctx); if (ret) @@ -526,9 +551,8 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, } /* Populate ttm with pages if needed. Typically system memory. */ - if (bo->ttm && (dst_man->use_tt || - (bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) { - ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx); + if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) { + ret = ttm_tt_populate(bo->bdev, ttm, ctx); if (ret) return ret; } @@ -537,23 +561,10 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, if (IS_ERR(dst_st)) return PTR_ERR(dst_st); - ret = i915_ttm_accel_move(bo, dst_mem, dst_st); - if (ret) { - /* If we start mapping GGTT, we can no longer use man::use_tt here. */ - dst_iter = !cpu_maps_iomem(dst_mem) ? - ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) : - ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, - dst_st, dst_reg->region.start); - - src_iter = !cpu_maps_iomem(bo->resource) ? - ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : - ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, - obj->ttm.cached_io_st, - src_reg->region.start); + clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); + if (!(clear && ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC))) + __i915_ttm_move(bo, clear, dst_mem, dst_st); - ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); - } - /* Below dst_mem becomes bo->resource. */ ttm_bo_move_sync_cleanup(bo, dst_mem); i915_ttm_adjust_domains_after_move(obj); i915_ttm_free_cached_io_st(obj); -- cgit v1.2.3-70-g09d2 From 669076334bfa7915e6856cf49c6408a2ec07df7a Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 13 Aug 2021 16:43:31 +0200 Subject: drm/ttm, drm/i915: Update ttm_move_memcpy for async use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The buffer object argument to ttm_move_memcpy was only used to determine whether the destination memory should be cleared only or whether we should copy data. Replace it with a "clear" bool, and update the callers. The intention here is to be able to use ttm_move_memcpy() async under a dma-fence as a fallback if an accelerated blit fails in a security- critical path where data might leak if the blit is not properly performed. For that purpose the bo is an unsuitable argument since its relevant members might already have changed at call time. Finally, update the ttm_move_memcpy kerneldoc that seems to have ended up with a stale version. Cc: Christian König Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Reviewed-by: Christian König Link: https://lore.kernel.org/r/20210813144331.372957-3-thomas.hellstrom@linux.intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20210813144331.372957-3-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- drivers/gpu/drm/ttm/ttm_bo_util.c | 20 ++++++++++---------- include/drm/ttm/ttm_bo_driver.h | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index d07de18529ab..6995c66cbe21 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -518,7 +518,7 @@ static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, obj->ttm.cached_io_st, src_reg->region.start); - ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter); } } diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 2f57f824e6db..e3747f069674 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -75,22 +75,21 @@ void ttm_mem_io_free(struct ttm_device *bdev, /** * ttm_move_memcpy - Helper to perform a memcpy ttm move operation. - * @bo: The struct ttm_buffer_object. - * @new_mem: The struct ttm_resource we're moving to (copy destination). - * @new_iter: A struct ttm_kmap_iter representing the destination resource. + * @clear: Whether to clear rather than copy. + * @num_pages: Number of pages of the operation. + * @dst_iter: A struct ttm_kmap_iter representing the destination resource. * @src_iter: A struct ttm_kmap_iter representing the source resource. * * This function is intended to be able to move out async under a * dma-fence if desired. */ -void ttm_move_memcpy(struct ttm_buffer_object *bo, +void ttm_move_memcpy(bool clear, u32 num_pages, struct ttm_kmap_iter *dst_iter, struct ttm_kmap_iter *src_iter) { const struct ttm_kmap_iter_ops *dst_ops = dst_iter->ops; const struct ttm_kmap_iter_ops *src_ops = src_iter->ops; - struct ttm_tt *ttm = bo->ttm; struct dma_buf_map src_map, dst_map; pgoff_t i; @@ -99,10 +98,7 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo, return; /* Don't move nonexistent data. Clear destination instead. */ - if (src_ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm))) { - if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) - return; - + if (clear) { for (i = 0; i < num_pages; ++i) { dst_ops->map_local(dst_iter, &dst_map, i); if (dst_map.is_iomem) @@ -146,6 +142,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, struct ttm_kmap_iter_linear_io io; } _dst_iter, _src_iter; struct ttm_kmap_iter *dst_iter, *src_iter; + bool clear; int ret = 0; if (ttm && ((ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) || @@ -169,7 +166,10 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, goto out_src_iter; } - ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + clear = src_iter->ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm)); + if (!(clear && ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC))) + ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter); + src_copy = *src_mem; ttm_bo_move_sync_cleanup(bo, dst_mem); diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 68d6069572aa..5f087575194b 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -322,7 +322,7 @@ int ttm_bo_tt_bind(struct ttm_buffer_object *bo, struct ttm_resource *mem); */ void ttm_bo_tt_destroy(struct ttm_buffer_object *bo); -void ttm_move_memcpy(struct ttm_buffer_object *bo, +void ttm_move_memcpy(bool clear, u32 num_pages, struct ttm_kmap_iter *dst_iter, struct ttm_kmap_iter *src_iter); -- cgit v1.2.3-70-g09d2 From f3ede209d44d71636890a78fa89c5b1c83340320 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 25 Aug 2021 18:06:23 +0300 Subject: drm/i915/pci: rename functions to have i915_pci prefix Follow the usual naming conventions. While at it, fix i915_pci.h SPDX license comment format and add header include guards. Cc: Daniel Vetter Reviewed-by: Rodrigo Vivi Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210825150623.28980-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_module.c | 4 ++-- drivers/gpu/drm/i915/i915_pci.c | 4 ++-- drivers/gpu/drm/i915/i915_pci.h | 12 ++++++++---- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_module.c b/drivers/gpu/drm/i915/i915_module.c index d8b4482c69d0..ab2295dd4500 100644 --- a/drivers/gpu/drm/i915/i915_module.c +++ b/drivers/gpu/drm/i915/i915_module.c @@ -67,8 +67,8 @@ static const struct { { .init = i915_mock_selftests }, { .init = i915_pmu_init, .exit = i915_pmu_exit }, - { .init = i915_register_pci_driver, - .exit = i915_unregister_pci_driver }, + { .init = i915_pci_register_driver, + .exit = i915_pci_unregister_driver }, { .init = i915_perf_sysctl_register, .exit = i915_perf_sysctl_unregister }, }; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 3fb55e2404b6..f0e2f5e67bda 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1216,12 +1216,12 @@ static struct pci_driver i915_pci_driver = { .driver.pm = &i915_pm_ops, }; -int i915_register_pci_driver(void) +int i915_pci_register_driver(void) { return pci_register_driver(&i915_pci_driver); } -void i915_unregister_pci_driver(void) +void i915_pci_unregister_driver(void) { pci_unregister_driver(&i915_pci_driver); } diff --git a/drivers/gpu/drm/i915/i915_pci.h b/drivers/gpu/drm/i915/i915_pci.h index b386f319f52e..ee048c238174 100644 --- a/drivers/gpu/drm/i915/i915_pci.h +++ b/drivers/gpu/drm/i915/i915_pci.h @@ -1,8 +1,12 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2021 Intel Corporation */ -int i915_register_pci_driver(void); -void i915_unregister_pci_driver(void); +#ifndef __I915_PCI_H__ +#define __I915_PCI_H__ + +int i915_pci_register_driver(void); +void i915_pci_unregister_driver(void); + +#endif /* __I915_PCI_H__ */ -- cgit v1.2.3-70-g09d2 From ba3d8257f2d94ab227af880e3e40868c80ad8d93 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 25 Aug 2021 20:35:59 -0700 Subject: drm/i915: Ensure wa_init_finish() is called for ctx workaround list A recent restructuring of our context workaround list initialization added an early return for non-render engines; this caused us to potentially miss the wa_init_finish() call at the end of the function. The mistake is pretty harmless --- the only impact is that non-render engines on graphics version 12.50+ platforms we don't trim down the workaround list to reclaim some memory, and we don't print the usual "Initialized 1 context workaround" message in dmesg. Let's change the early return to a jump down to the wa_init_finish() call at the bottom of the function. Reported-by: Tvrtko Ursulin Fixes: 9e9dfd080201 ("drm/i915/dg2: Maintain backward-compatible nested batch behavior") Signed-off-by: Matt Roper Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20210826033559.1209020-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 688ed04edbf6..94e1937f8d29 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -689,7 +689,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, fakewa_disable_nestedbb_mode(engine, wal); if (engine->class != RENDER_CLASS) - return; + goto done; if (IS_DG1(i915)) dg1_ctx_workarounds_init(engine, wal); @@ -720,6 +720,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, else MISSING_CASE(GRAPHICS_VER(i915)); +done: wa_init_finish(wal); } -- cgit v1.2.3-70-g09d2 From f123efebe4361b9b16975fcc3dbc0a6331fa6a14 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 20 Aug 2021 17:49:32 +0200 Subject: drm/i915: Actually delete gpu reloc selftests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 8e02cceb1f1f ("drm/i915: delete gpu reloc code") I deleted the gpu relocation code and the selftest include and enabling, but accidentally forgot about the selftest source code. Fix this oversight. Reviewed-by: Rodrigo Vivi Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Joonas Lahtinen Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210820154932.296628-1-daniel.vetter@ffwll.ch --- .../drm/i915/gem/selftests/i915_gem_execbuffer.c | 190 --------------------- 1 file changed, 190 deletions(-) delete mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c deleted file mode 100644 index 16162fc2782d..000000000000 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +++ /dev/null @@ -1,190 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2020 Intel Corporation - */ - -#include "i915_selftest.h" - -#include "gt/intel_engine_pm.h" -#include "selftests/igt_flush_test.h" - -static u64 read_reloc(const u32 *map, int x, const u64 mask) -{ - u64 reloc; - - memcpy(&reloc, &map[x], sizeof(reloc)); - return reloc & mask; -} - -static int __igt_gpu_reloc(struct i915_execbuffer *eb, - struct drm_i915_gem_object *obj) -{ - const unsigned int offsets[] = { 8, 3, 0 }; - const u64 mask = - GENMASK_ULL(eb->reloc_cache.use_64bit_reloc ? 63 : 31, 0); - const u32 *map = page_mask_bits(obj->mm.mapping); - struct i915_request *rq; - struct i915_vma *vma; - int err; - int i; - - vma = i915_vma_instance(obj, eb->context->vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - err = i915_gem_object_lock(obj, &eb->ww); - if (err) - return err; - - err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, PIN_USER | PIN_HIGH); - if (err) - return err; - - /* 8-Byte aligned */ - err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0); - if (err <= 0) - goto reloc_err; - - /* !8-Byte aligned */ - err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1); - if (err <= 0) - goto reloc_err; - - /* Skip to the end of the cmd page */ - i = PAGE_SIZE / sizeof(u32) - 1; - i -= eb->reloc_cache.rq_size; - memset32(eb->reloc_cache.rq_cmd + eb->reloc_cache.rq_size, - MI_NOOP, i); - eb->reloc_cache.rq_size += i; - - /* Force next batch */ - err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2); - if (err <= 0) - goto reloc_err; - - GEM_BUG_ON(!eb->reloc_cache.rq); - rq = i915_request_get(eb->reloc_cache.rq); - reloc_gpu_flush(eb, &eb->reloc_cache); - GEM_BUG_ON(eb->reloc_cache.rq); - - err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2); - if (err) { - intel_gt_set_wedged(eb->engine->gt); - goto put_rq; - } - - if (!i915_request_completed(rq)) { - pr_err("%s: did not wait for relocations!\n", eb->engine->name); - err = -EINVAL; - goto put_rq; - } - - for (i = 0; i < ARRAY_SIZE(offsets); i++) { - u64 reloc = read_reloc(map, offsets[i], mask); - - if (reloc != i) { - pr_err("%s[%d]: map[%d] %llx != %x\n", - eb->engine->name, i, offsets[i], reloc, i); - err = -EINVAL; - } - } - if (err) - igt_hexdump(map, 4096); - -put_rq: - i915_request_put(rq); -unpin_vma: - i915_vma_unpin(vma); - return err; - -reloc_err: - if (!err) - err = -EIO; - goto unpin_vma; -} - -static int igt_gpu_reloc(void *arg) -{ - struct i915_execbuffer eb; - struct drm_i915_gem_object *scratch; - int err = 0; - u32 *map; - - eb.i915 = arg; - - scratch = i915_gem_object_create_internal(eb.i915, 4096); - if (IS_ERR(scratch)) - return PTR_ERR(scratch); - - map = i915_gem_object_pin_map_unlocked(scratch, I915_MAP_WC); - if (IS_ERR(map)) { - err = PTR_ERR(map); - goto err_scratch; - } - - intel_gt_pm_get(&eb.i915->gt); - - for_each_uabi_engine(eb.engine, eb.i915) { - if (intel_engine_requires_cmd_parser(eb.engine) || - intel_engine_using_cmd_parser(eb.engine)) - continue; - - reloc_cache_init(&eb.reloc_cache, eb.i915); - memset(map, POISON_INUSE, 4096); - - intel_engine_pm_get(eb.engine); - eb.context = intel_context_create(eb.engine); - if (IS_ERR(eb.context)) { - err = PTR_ERR(eb.context); - goto err_pm; - } - eb.reloc_pool = NULL; - eb.reloc_context = NULL; - - i915_gem_ww_ctx_init(&eb.ww, false); -retry: - err = intel_context_pin_ww(eb.context, &eb.ww); - if (!err) { - err = __igt_gpu_reloc(&eb, scratch); - - intel_context_unpin(eb.context); - } - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&eb.ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&eb.ww); - - if (eb.reloc_pool) - intel_gt_buffer_pool_put(eb.reloc_pool); - if (eb.reloc_context) - intel_context_put(eb.reloc_context); - - intel_context_put(eb.context); -err_pm: - intel_engine_pm_put(eb.engine); - if (err) - break; - } - - if (igt_flush_test(eb.i915)) - err = -EIO; - - intel_gt_pm_put(&eb.i915->gt); -err_scratch: - i915_gem_object_put(scratch); - return err; -} - -int i915_gem_execbuffer_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_gpu_reloc), - }; - - if (intel_gt_is_wedged(&i915->gt)) - return 0; - - return i915_live_subtests(tests, i915); -} -- cgit v1.2.3-70-g09d2 From 5db1856781e45c9610f7652a19cc656b984235e7 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 23 Aug 2021 09:31:37 -0700 Subject: drm/i915/guc: drop guc_communication_enabled The function is only used from within GEM_BUG_ON(), which is causing warnings with Wunneeded-internal-declaration in some builds. Since the function is a simple wrapper around a CT function, we can just call the CT function directly instead. Fixes: 1fb12c587152 ("drm/i915/guc: skip disabling CTBs before sanitizing the GuC") Reported-by: kernel test robot Signed-off-by: Daniele Ceraolo Spurio Cc: Matthew Brost Cc: John Harrison Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210823163137.19770-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index b104fb7607eb..86c318516e14 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -172,11 +172,6 @@ void intel_uc_driver_remove(struct intel_uc *uc) __uc_free_load_err_log(uc); } -static inline bool guc_communication_enabled(struct intel_guc *guc) -{ - return intel_guc_ct_enabled(&guc->ct); -} - /* * Events triggered while CT buffers are disabled are logged in the SCRATCH_15 * register using the same bits used in the CT message payload. Since our @@ -210,7 +205,7 @@ static void guc_get_mmio_msg(struct intel_guc *guc) static void guc_handle_mmio_msg(struct intel_guc *guc) { /* we need communication to be enabled to reply to GuC */ - GEM_BUG_ON(!guc_communication_enabled(guc)); + GEM_BUG_ON(!intel_guc_ct_enabled(&guc->ct)); spin_lock_irq(&guc->irq_lock); if (guc->mmio_msg) { @@ -226,7 +221,7 @@ static int guc_enable_communication(struct intel_guc *guc) struct drm_i915_private *i915 = gt->i915; int ret; - GEM_BUG_ON(guc_communication_enabled(guc)); + GEM_BUG_ON(intel_guc_ct_enabled(&guc->ct)); ret = i915_inject_probe_error(i915, -ENXIO); if (ret) @@ -662,7 +657,7 @@ static int __uc_resume(struct intel_uc *uc, bool enable_communication) return 0; /* Make sure we enable communication if and only if it's disabled */ - GEM_BUG_ON(enable_communication == guc_communication_enabled(guc)); + GEM_BUG_ON(enable_communication == intel_guc_ct_enabled(&guc->ct)); if (enable_communication) guc_enable_communication(guc); -- cgit v1.2.3-70-g09d2 From 450cede7f3804ca7f8b3da210ebefa61c0958f22 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 31 Aug 2021 14:29:31 +0200 Subject: drm/i915/gem: Fix the mman selftest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using the I915_MMAP_TYPE_FIXED mmap type requires the TTM backend, so for that mmap type, use __i915_gem_object_create_user() instead of i915_gem_object_create_internal(), as we really want to tests objects mmap-able by user-space. This also means that the out-of-space error happens at object creation and returns -ENXIO rather than -ENOSPC, so fix the code up to expect that on out-of-offset-space errors. Finally only use I915_MMAP_TYPE_FIXED for LMEM and SMEM for now if testing on LMEM-capable devices. For stolen LMEM, we still take the same path as for integrated, as that haven't been moved over to TTM yet, and user-space should not be able to create out of stolen LMEM anyway. v2: - Check the presence of the obj->ops->mmap_offset callback rather than hardcoding the supported mmap regions in can_mmap() (Maarten Lankhorst) Fixes: 7961c5b60f23 ("drm/i915: Add TTM offset argument to mmap.") Cc: Maarten Lankhorst Signed-off-by: Thomas Hellström Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20210831122931.157536-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 26 +++++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index b20f5621f62b..a2c34e5a1c54 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -581,6 +581,20 @@ static enum i915_mmap_type default_mapping(struct drm_i915_private *i915) return I915_MMAP_TYPE_GTT; } +static struct drm_i915_gem_object * +create_sys_or_internal(struct drm_i915_private *i915, + unsigned long size) +{ + if (HAS_LMEM(i915)) { + struct intel_memory_region *sys_region = + i915->mm.regions[INTEL_REGION_SMEM]; + + return __i915_gem_object_create_user(i915, size, &sys_region, 1); + } + + return i915_gem_object_create_internal(i915, size); +} + static bool assert_mmap_offset(struct drm_i915_private *i915, unsigned long size, int expected) @@ -589,7 +603,7 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, u64 offset; int ret; - obj = i915_gem_object_create_internal(i915, size); + obj = create_sys_or_internal(i915, size); if (IS_ERR(obj)) return expected && expected == PTR_ERR(obj); @@ -633,6 +647,7 @@ static int igt_mmap_offset_exhaustion(void *arg) struct drm_mm_node *hole, *next; int loop, err = 0; u64 offset; + int enospc = HAS_LMEM(i915) ? -ENXIO : -ENOSPC; /* Disable background reaper */ disable_retire_worker(i915); @@ -683,14 +698,14 @@ static int igt_mmap_offset_exhaustion(void *arg) } /* Too large */ - if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, -ENOSPC)) { + if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, enospc)) { pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n"); err = -EINVAL; goto out; } /* Fill the hole, further allocation attempts should then fail */ - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + obj = create_sys_or_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); pr_err("Unable to create object for reclaimed hole\n"); @@ -703,7 +718,7 @@ static int igt_mmap_offset_exhaustion(void *arg) goto err_obj; } - if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) { + if (!assert_mmap_offset(i915, PAGE_SIZE, enospc)) { pr_err("Unexpectedly succeeded in inserting object into no holes!\n"); err = -EINVAL; goto err_obj; @@ -839,10 +854,9 @@ static int wc_check(struct drm_i915_gem_object *obj) static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); bool no_map; - if (HAS_LMEM(i915)) + if (obj->ops->mmap_offset) return type == I915_MMAP_TYPE_FIXED; else if (type == I915_MMAP_TYPE_FIXED) return false; -- cgit v1.2.3-70-g09d2 From b62aa57e3c78d749a1932b636c8fa4e1ef655f4d Mon Sep 17 00:00:00 2001 From: Ayaz A Siddiqui Date: Fri, 3 Sep 2021 14:51:49 +0530 Subject: drm/i915/gt: Add support of mocs propagation Now there are lots of Command and registers that require mocs index programming. So propagating mocs_index from mocs to gt so that it can be used directly without having platform-specific checks. V2: Changed 'i915_mocs_index_gt' to anonymous structure. Cc: CQ Tang Reviewed-by: Matt Roper Signed-off-by: Ayaz A Siddiqui Signed-off-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20210903092153.535736-2-ayaz.siddiqui@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 2 ++ drivers/gpu/drm/i915/gt/intel_gt_types.h | 4 ++++ drivers/gpu/drm/i915/gt/intel_mocs.c | 13 +++++++++++++ drivers/gpu/drm/i915/gt/intel_mocs.h | 1 + 4 files changed, 20 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 62d40c986642..2aeaae036a6f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -682,6 +682,8 @@ int intel_gt_init(struct intel_gt *gt) goto err_pm; } + set_mocs_index(gt); + err = intel_engines_init(gt); if (err) goto err_engines; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index a81e21bf1bd1..6fdcde64c180 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -192,6 +192,10 @@ struct intel_gt { unsigned long mslice_mask; } info; + + struct { + u8 uc_index; + } mocs; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 582c4423b95d..7ccac15d9a33 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -22,6 +22,7 @@ struct drm_i915_mocs_table { unsigned int size; unsigned int n_entries; const struct drm_i915_mocs_entry *table; + u8 uc_index; }; /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ @@ -340,14 +341,18 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, { unsigned int flags; + memset(table, 0, sizeof(struct drm_i915_mocs_table)); + if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; + table->uc_index = 1; table->n_entries = GEN9_NUM_MOCS_ENTRIES; } else if (GRAPHICS_VER(i915) >= 12) { table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 3; } else if (GRAPHICS_VER(i915) == 11) { table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; @@ -504,6 +509,14 @@ static u32 global_mocs_offset(void) return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)); } +void set_mocs_index(struct intel_gt *gt) +{ + struct drm_i915_mocs_table table; + + get_mocs_settings(gt->i915, &table); + gt->mocs.uc_index = table.uc_index; +} + void intel_mocs_init(struct intel_gt *gt) { struct drm_i915_mocs_table table; diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h index d83274f5163b..8a09d64b115f 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.h +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h @@ -36,5 +36,6 @@ struct intel_gt; void intel_mocs_init(struct intel_gt *gt); void intel_mocs_init_engine(struct intel_engine_cs *engine); +void set_mocs_index(struct intel_gt *gt); #endif -- cgit v1.2.3-70-g09d2 From d79a1d71318014066b6e1c78e5457a105d67f2ea Mon Sep 17 00:00:00 2001 From: Ayaz A Siddiqui Date: Fri, 3 Sep 2021 14:51:50 +0530 Subject: drm/i915/gt: Set CMD_CCTL to UC for Gen12 Onward Cache-control registers for Command Stream(CMD_CCTL) are used to set catchability for memory writes and reads outputted by Command Streamers on Gen12 onward platforms. These registers need to point un-cached(UC) MOCS index. Reviewed-by: Matt Roper Signed-off-by: Ayaz A Siddiqui Signed-off-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20210903092153.535736-3-ayaz.siddiqui@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 27 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 17 +++++++++++++++++ 2 files changed, 44 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 94e1937f8d29..ef7255a44b9a 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1640,6 +1640,31 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine) i915_mmio_reg_offset(RING_NOPID(base))); } +/* + * engine_fake_wa_init(), a place holder to program the registers + * which are not part of an official workaround defined by the + * hardware team. + * Adding programming of those register inside workaround will + * allow utilizing wa framework to proper application and verification. + */ +static void +engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) +{ + u8 mocs; + + /* + * RING_CMD_CCTL are need to be programed to un-cached + * for memory writes and reads outputted by Command + * Streamers on Gen12 onward platforms. + */ + if (GRAPHICS_VER(engine->i915) >= 12) { + mocs = engine->gt->mocs.uc_index; + wa_masked_field_set(wal, + RING_CMD_CCTL(engine->mmio_base), + CMD_CCTL_MOCS_MASK, + CMD_CCTL_MOCS_OVERRIDE(mocs, mocs)); + } +} static void rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -2080,6 +2105,8 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal if (I915_SELFTEST_ONLY(GRAPHICS_VER(engine->i915) < 4)) return; + engine_fake_wa_init(engine, wal); + if (engine->class == RENDER_CLASS) rcs_engine_wa_init(engine, wal); else diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b289f5cefa59..0f44fcf8d6a4 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2560,6 +2560,23 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RING_HWS_PGA(base) _MMIO((base) + 0x80) #define RING_ID(base) _MMIO((base) + 0x8c) #define RING_HWS_PGA_GEN6(base) _MMIO((base) + 0x2080) + +#define RING_CMD_CCTL(base) _MMIO((base) + 0xc4) +/* + * CMD_CCTL read/write fields take a MOCS value and _not_ a table index. + * The lsb of each can be considered a separate enabling bit for encryption. + * 6:0 == default MOCS value for reads => 6:1 == table index for reads. + * 13:7 == default MOCS value for writes => 13:8 == table index for writes. + * 15:14 == Reserved => 31:30 are set to 0. + */ +#define CMD_CCTL_WRITE_OVERRIDE_MASK REG_GENMASK(13, 7) +#define CMD_CCTL_READ_OVERRIDE_MASK REG_GENMASK(6, 0) +#define CMD_CCTL_MOCS_MASK (CMD_CCTL_WRITE_OVERRIDE_MASK | \ + CMD_CCTL_READ_OVERRIDE_MASK) +#define CMD_CCTL_MOCS_OVERRIDE(write, read) \ + (REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \ + REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1)) + #define RING_RESET_CTL(base) _MMIO((base) + 0xd0) #define RESET_CTL_CAT_ERROR REG_BIT(2) #define RESET_CTL_READY_TO_RESET REG_BIT(1) -- cgit v1.2.3-70-g09d2 From c6b248489dc3f780ee91e187a1431825d6f298fd Mon Sep 17 00:00:00 2001 From: Ayaz A Siddiqui Date: Fri, 3 Sep 2021 14:51:51 +0530 Subject: drm/i915/gt: Set BLIT_CCTL reg to un-cached Blitter commands which do not have MOCS fields rely on cacheability of BlitterCacheControlRegister which was mapped to index 0 by default.Once we changed the MOCS value of index 0 to L3 WB, tests like gem_linear_blits started failing due to a change in cacheability from UC to WB. Program and place the BlitterCacheControlRegister in build_aux_regs(). Reviewed-by: Matt Roper Signed-off-by: Ayaz A Siddiqui Signed-off-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20210903092153.535736-4-ayaz.siddiqui@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 43 +++++++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_reg.h | 9 ++++++ 2 files changed, 50 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index ef7255a44b9a..c314d4917b6b 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -675,6 +675,41 @@ static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN); } +static void gen12_ctx_gt_mocs_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + u8 mocs; + + /* + * Some blitter commands do not have a field for MOCS, those + * commands will use MOCS index pointed by BLIT_CCTL. + * BLIT_CCTL registers are needed to be programmed to un-cached. + */ + if (engine->class == COPY_ENGINE_CLASS) { + mocs = engine->gt->mocs.uc_index; + wa_write_clr_set(wal, + BLIT_CCTL(engine->mmio_base), + BLIT_CCTL_MASK, + BLIT_CCTL_MOCS(mocs, mocs)); + } +} + +/* + * gen12_ctx_gt_fake_wa_init() aren't programmingan official workaround + * defined by the hardware team, but it programming general context registers. + * Adding those context register programming in context workaround + * allow us to use the wa framework for proper application and validation. + */ +static void +gen12_ctx_gt_fake_wa_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + fakewa_disable_nestedbb_mode(engine, wal); + + gen12_ctx_gt_mocs_init(engine, wal); +} + static void __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, struct i915_wa_list *wal, @@ -685,8 +720,12 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, wa_init_start(wal, name, engine->name); /* Applies to all engines */ - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) - fakewa_disable_nestedbb_mode(engine, wal); + /* + * Fake workarounds are not the actual workaround but + * programming of context registers using workaround framework. + */ + if (GRAPHICS_VER(i915) >= 12) + gen12_ctx_gt_fake_wa_init(engine, wal); if (engine->class != RENDER_CLASS) goto done; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0f44fcf8d6a4..00b17bc32afb 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2577,6 +2577,15 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) (REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \ REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1)) +#define BLIT_CCTL(base) _MMIO((base) + 0x204) +#define BLIT_CCTL_DST_MOCS_MASK REG_GENMASK(14, 8) +#define BLIT_CCTL_SRC_MOCS_MASK REG_GENMASK(6, 0) +#define BLIT_CCTL_MASK (BLIT_CCTL_DST_MOCS_MASK | \ + BLIT_CCTL_SRC_MOCS_MASK) +#define BLIT_CCTL_MOCS(dst, src) \ + (REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, (dst) << 1) | \ + REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, (src) << 1)) + #define RING_RESET_CTL(base) _MMIO((base) + 0xd0) #define RESET_CTL_CAT_ERROR REG_BIT(2) #define RESET_CTL_READY_TO_RESET REG_BIT(1) -- cgit v1.2.3-70-g09d2 From cfbe5291a1890b688e6f3accbe2b0e1cf3c601fb Mon Sep 17 00:00:00 2001 From: Ayaz A Siddiqui Date: Fri, 3 Sep 2021 14:51:52 +0530 Subject: drm/i915/gt: Initialize unused MOCS entries with device specific values Historically we've initialized all undefined/reserved entries in a platform's MOCS table to the contents of table entry #1 (i.e., I915_MOCS_PTE). Going forward, we can't assume that table entry #1 will always contain suitable values to use for undefined/reserved table indices. We'll allow a platform-specific table index to be selected at table initialization time in these cases. This new mechanism to select L3 WB entry will be applicable for all the Gen12+ platforms except TGL and RKL. Since TGL and RLK are already in production so their mocs settings are intact to avoid ABI break. Reviewed-by: Matt Roper Signed-off-by: Ayaz A Siddiqui Signed-off-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20210903092153.535736-5-ayaz.siddiqui@intel.com --- drivers/gpu/drm/i915/gt/intel_mocs.c | 46 +++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 7ccac15d9a33..552bfd1c113b 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -23,6 +23,7 @@ struct drm_i915_mocs_table { unsigned int n_entries; const struct drm_i915_mocs_entry *table; u8 uc_index; + u8 unused_entries_index; }; /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ @@ -89,18 +90,25 @@ struct drm_i915_mocs_table { * * Entries not part of the following tables are undefined as far as * userspace is concerned and shouldn't be relied upon. For Gen < 12 - * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for - * PTE and will be initialized to an invalid value. + * they will be initialized to PTE. Gen >= 12 don't have a setting for + * PTE and those platforms except TGL/RKL will be initialized L3 WB to + * catch accidental use of reserved and unused mocs indexes. * * The last few entries are reserved by the hardware. For ICL+ they * should be initialized according to bspec and never used, for older * platforms they should never be written to. * - * NOTE: These tables are part of bspec and defined as part of hardware + * NOTE1: These tables are part of bspec and defined as part of hardware * interface for ICL+. For older platforms, they are part of kernel * ABI. It is expected that, for specific hardware platform, existing * entries will remain constant and the table will only be updated by * adding new entries, filling unused positions. + * + * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS + * indices have been set to L3 WB. These reserved entries should never + * be used, they may be changed to low performant variants with better + * coherency in the future if more entries are needed. + * For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC. */ #define GEN9_MOCS_ENTRIES \ MOCS_ENTRY(I915_MOCS_UNCACHED, \ @@ -283,17 +291,9 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = { }; static const struct drm_i915_mocs_entry dg1_mocs_table[] = { - /* Error */ - MOCS_ENTRY(0, 0, L3_0_DIRECT), /* UC */ MOCS_ENTRY(1, 0, L3_1_UC), - - /* Reserved */ - MOCS_ENTRY(2, 0, L3_0_DIRECT), - MOCS_ENTRY(3, 0, L3_0_DIRECT), - MOCS_ENTRY(4, 0, L3_0_DIRECT), - /* WB - L3 */ MOCS_ENTRY(5, 0, L3_3_WB), /* WB - L3 50% */ @@ -343,16 +343,22 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); + table->unused_entries_index = I915_MOCS_PTE; if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; table->uc_index = 1; table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 1; + table->unused_entries_index = 5; } else if (GRAPHICS_VER(i915) >= 12) { table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->uc_index = 3; + /* For TGL/RKL, Can't be changed now for ABI reasons */ + if (!IS_TIGERLAKE(i915) && !IS_ROCKETLAKE(i915)) + table->unused_entries_index = 2; } else if (GRAPHICS_VER(i915) == 11) { table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; @@ -398,16 +404,16 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, } /* - * Get control_value from MOCS entry taking into account when it's not used: - * I915_MOCS_PTE's value is returned in this case. + * Get control_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is non-zero then its value will be returned + * otherwise I915_MOCS_PTE's value is returned in this case. */ static u32 get_entry_control(const struct drm_i915_mocs_table *table, unsigned int index) { if (index < table->size && table->table[index].used) return table->table[index].control_value; - - return table->table[I915_MOCS_PTE].control_value; + return table->table[table->unused_entries_index].control_value; } #define for_each_mocs(mocs, t, i) \ @@ -422,6 +428,8 @@ static void __init_mocs_table(struct intel_uncore *uncore, unsigned int i; u32 mocs; + drm_WARN_ONCE(&uncore->i915->drm, !table->unused_entries_index, + "Unused entries index should have been defined\n"); for_each_mocs(mocs, table, i) intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs); } @@ -448,16 +456,16 @@ static void init_mocs_table(struct intel_engine_cs *engine, } /* - * Get l3cc_value from MOCS entry taking into account when it's not used: - * I915_MOCS_PTE's value is returned in this case. + * Get l3cc_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is not zero then its value will be returned + * otherwise I915_MOCS_PTE's value is returned in this case. */ static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, unsigned int index) { if (index < table->size && table->table[index].used) return table->table[index].l3cc_value; - - return table->table[I915_MOCS_PTE].l3cc_value; + return table->table[table->unused_entries_index].l3cc_value; } static u32 l3cc_combine(u16 low, u16 high) -- cgit v1.2.3-70-g09d2 From fb1e95bc2755dd29625c6ba7d553284112761f88 Mon Sep 17 00:00:00 2001 From: Sreedhar Telukuntla Date: Fri, 3 Sep 2021 14:51:53 +0530 Subject: drm/i915/gt: Initialize L3CC table in mocs init Initialize the L3CC table as part of mocs initialization to program LNCFCMOCSx registers so that the mocs settings are available for selection for subsequent memory transactions in the driver load path. We need to keep L3CC initialization in intel_mocs_init_engine() also so that in execlists submission, these registers can be rewritten during engine reset. Reviewed-by: Matt Roper Signed-off-by: Sreedhar Telukuntla Signed-off-by: Ayaz A Siddiqui Signed-off-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20210903092153.535736-6-ayaz.siddiqui@intel.com --- drivers/gpu/drm/i915/gt/intel_mocs.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 552bfd1c113b..e96afd7beb49 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -481,10 +481,9 @@ static u32 l3cc_combine(u16 low, u16 high) 0; \ i++) -static void init_l3cc_table(struct intel_engine_cs *engine, +static void init_l3cc_table(struct intel_uncore *uncore, const struct drm_i915_mocs_table *table) { - struct intel_uncore *uncore = engine->uncore; unsigned int i; u32 l3cc; @@ -509,7 +508,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) init_mocs_table(engine, &table); if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS) - init_l3cc_table(engine, &table); + init_l3cc_table(engine->uncore, &table); } static u32 global_mocs_offset(void) @@ -536,6 +535,14 @@ void intel_mocs_init(struct intel_gt *gt) flags = get_mocs_settings(gt->i915, &table); if (flags & HAS_GLOBAL_MOCS) __init_mocs_table(gt->uncore, &table, global_mocs_offset()); + + /* + * Initialize the L3CC table as part of mocs initalization to make + * sure the LNCFCMOCSx registers are programmed for the subsequent + * memory transactions including guc transactions + */ + if (flags & HAS_RENDER_L3CC) + init_l3cc_table(gt->uncore, &table); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -- cgit v1.2.3-70-g09d2 From 75eefd82581f32da77d7017d11a932ee12a998eb Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:47 +0200 Subject: drm/i915: Release i915_gem_context from a worker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only reason for this really is the i915_gem_engines->fence callback engines_notify(), which exists purely as a fairly funky reference counting scheme for that. Otherwise all other callers are from process context, and generally fairly benign locking context. Unfortunately untangling that requires some major surgery, and we have a few i915_gem_context reference counting bugs that need fixing, and they blow in the current hardirq calling context, so we need a stop-gap measure. Put a FIXME comment in when this should be removable again. v2: Fix mock_context(), noticed by intel-gfx-ci. Acked-by: Acked-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 13 +++++++++++-- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 12 ++++++++++++ drivers/gpu/drm/i915/gem/selftests/mock_context.c | 1 + 3 files changed, 24 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fd169cf2f75a..051bc357ff65 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -986,9 +986,10 @@ free_engines: return err; } -void i915_gem_context_release(struct kref *ref) +static void i915_gem_context_release_work(struct work_struct *work) { - struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); + struct i915_gem_context *ctx = container_of(work, typeof(*ctx), + release_work); trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); @@ -1002,6 +1003,13 @@ void i915_gem_context_release(struct kref *ref) kfree_rcu(ctx, rcu); } +void i915_gem_context_release(struct kref *ref) +{ + struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); + + queue_work(ctx->i915->wq, &ctx->release_work); +} + static inline struct i915_gem_engines * __context_engines_static(const struct i915_gem_context *ctx) { @@ -1303,6 +1311,7 @@ i915_gem_create_context(struct drm_i915_private *i915, ctx->sched = pc->sched; mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->link); + INIT_WORK(&ctx->release_work, i915_gem_context_release_work); spin_lock_init(&ctx->stale.lock); INIT_LIST_HEAD(&ctx->stale.engines); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 94c03a97cb77..0c38789bd4a8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -288,6 +288,18 @@ struct i915_gem_context { */ struct kref ref; + /** + * @release_work: + * + * Work item for deferred cleanup, since i915_gem_context_put() tends to + * be called from hardirq context. + * + * FIXME: The only real reason for this is &i915_gem_engines.fence, all + * other callers are from process context and need at most some mild + * shuffling to pull the i915_gem_context_put() call out of a spinlock. + */ + struct work_struct release_work; + /** * @rcu: rcu_head for deferred freeing. */ diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index fee070df1c97..067d68a6fe4c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -23,6 +23,7 @@ mock_context(struct drm_i915_private *i915, kref_init(&ctx->ref); INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915; + INIT_WORK(&ctx->release_work, i915_gem_context_release_work); mutex_init(&ctx->mutex); -- cgit v1.2.3-70-g09d2 From c238980efd3b35af70fc926066cf7440f50a97a9 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:48 +0200 Subject: drm/i915: Release ctx->syncobj on final put, not on ctx close MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gem context refcounting is another exercise in least locking design it seems, where most things get destroyed upon context closure (which can race with anything really). Only the actual memory allocation and the locks survive while holding a reference. This tripped up Jason when reimplementing the single timeline feature in commit 00dae4d3d35d4f526929633b76e00b0ab4d3970d Author: Jason Ekstrand Date: Thu Jul 8 10:48:12 2021 -0500 drm/i915: Implement SINGLE_TIMELINE with a syncobj (v4) We could fix the bug by holding ctx->mutex in execbuf and clear the pointer (again while holding the mutex) context_close, but it's cleaner to just make the context object actually invariant over its _entire_ lifetime. This way any other ioctl that's potentially racing, but holding a full reference, can still rely on ctx->syncobj being an immutable pointer. Which without this change, is not the case. Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Fixes: 00dae4d3d35d ("drm/i915: Implement SINGLE_TIMELINE with a syncobj (v4)") Cc: Jason Ekstrand Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Matthew Brost Cc: Matthew Auld Cc: Maarten Lankhorst Cc: "Thomas Hellström" Cc: Lionel Landwerlin Cc: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-2-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 051bc357ff65..5a053cf14948 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -994,6 +994,9 @@ static void i915_gem_context_release_work(struct work_struct *work) trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); + if (ctx->syncobj) + drm_syncobj_put(ctx->syncobj); + mutex_destroy(&ctx->engines_mutex); mutex_destroy(&ctx->lut_mutex); @@ -1220,9 +1223,6 @@ static void context_close(struct i915_gem_context *ctx) if (vm) i915_vm_close(vm); - if (ctx->syncobj) - drm_syncobj_put(ctx->syncobj); - ctx->file_priv = ERR_PTR(-EBADF); /* -- cgit v1.2.3-70-g09d2 From 8cf97637ff8891be040bac37b96dd97e5996ca93 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:49 +0200 Subject: drm/i915: Keep gem ctx->vm alive until the final put MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The comment added in commit b81dde719439c8f09bb61e742ed95bfc4b33946b Author: Chris Wilson Date: Tue May 21 22:11:29 2019 +0100 drm/i915: Allow userspace to clone contexts on creation and moved in commit 27dbae8f36c1c25008b7885fc07c57054b7dfba3 Author: Chris Wilson Date: Wed Nov 6 09:13:12 2019 +0000 drm/i915/gem: Safely acquire the ctx->vm when copying suggested that i915_address_space were at least intended to be managed through SLAB_TYPESAFE_BY_RCU: * This ppgtt may have be reallocated between * the read and the kref, and reassigned to a third * context. In order to avoid inadvertent sharing * of this ppgtt with that third context (and not * src), we have to confirm that we have the same * ppgtt after passing through the strong memory * barrier implied by a successful * kref_get_unless_zero(). But extensive git history search has not brough any such reuse to light. What has come to light though is that ever since commit 2850748ef8763ab46958e43a4d1c445f29eeb37d Author: Chris Wilson Date: Fri Oct 4 14:39:58 2019 +0100 drm/i915: Pull i915_vma_pin under the vm->mutex (yes this commit is earlier) the final i915_vma_put call has been moved from i915_gem_context_free (now called _release) to context_close, which means it's not actually safe anymore to access the ctx->vm pointer without lock helds, because it might disappear at any moment. Note that superficially things all still work, because the i915_address_space is RCU protected since commit b32fa811156328aea5a3c2ff05cc096490382456 Author: Chris Wilson Date: Thu Jun 20 19:37:05 2019 +0100 drm/i915/gtt: Defer address space cleanup to an RCU worker except the very clever macro above (which is designed to protected against object reuse due to SLAB_TYPESAFE_BY_RCU or similar tricks) results in an endless loop if the refcount of the ctx->vm ever permanently drops to 0. Which it totally now can. Fix that by moving the final i915_vm_put to where it should be. Note that i915_gem_context is rcu protected, but _only_ the final kfree. This means anyone who chases a pointer to a gem ctx solely under the protection can pretty only call kref_get_unless_zero(). This seems to be pretty much the case, aside from a bunch of cases that consult the scheduling information without any further protection. Reviewed-by: Maarten Lankhorst Cc: Jason Ekstrand Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Matthew Brost Cc: Matthew Auld Cc: Maarten Lankhorst Cc: "Thomas Hellström" Cc: Lionel Landwerlin Cc: Dave Airlie Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex") Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-3-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 5a053cf14948..12e2de1db1a2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -990,6 +990,7 @@ static void i915_gem_context_release_work(struct work_struct *work) { struct i915_gem_context *ctx = container_of(work, typeof(*ctx), release_work); + struct i915_address_space *vm; trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); @@ -997,6 +998,10 @@ static void i915_gem_context_release_work(struct work_struct *work) if (ctx->syncobj) drm_syncobj_put(ctx->syncobj); + vm = i915_gem_context_vm(ctx); + if (vm) + i915_vm_put(vm); + mutex_destroy(&ctx->engines_mutex); mutex_destroy(&ctx->lut_mutex); @@ -1220,8 +1225,15 @@ static void context_close(struct i915_gem_context *ctx) set_closed_name(ctx); vm = i915_gem_context_vm(ctx); - if (vm) + if (vm) { + /* i915_vm_close drops the final reference, which is a bit too + * early and could result in surprises with concurrent + * operations racing with thist ctx close. Keep a full reference + * until the end. + */ + i915_vm_get(vm); i915_vm_close(vm); + } ctx->file_priv = ERR_PTR(-EBADF); -- cgit v1.2.3-70-g09d2 From e1068a9e808a14cd532ede325e5e16df45c63f18 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:50 +0200 Subject: drm/i915: Drop code to handle set-vm races from execbuf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changing the vm from a finalized gem ctx is no longer possible, which means we don't have to check for that anymore. I was pondering whether to keep the check as a WARN_ON, but things go boom real bad real fast if the vm of a vma is wrong. Plus we'd need to also get the ggtt vm for !full-ppgtt platforms. Ditching it all seemed like a better idea. Reviewed-by: Maarten Lankhorst References: ccbc1b97948a ("drm/i915/gem: Don't allow changing the VM on running contexts (v4)") Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-4-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 45bc4e4ff048..4816359c4c21 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -759,11 +759,7 @@ static int __eb_add_lut(struct i915_execbuffer *eb, /* Check that the context hasn't been closed in the meantime */ err = -EINTR; if (!mutex_lock_interruptible(&ctx->lut_mutex)) { - struct i915_address_space *vm = rcu_access_pointer(ctx->vm); - - if (unlikely(vm && vma->vm != vm)) - err = -EAGAIN; /* user racing with ctx set-vm */ - else if (likely(!i915_gem_context_is_closed(ctx))) + if (likely(!i915_gem_context_is_closed(ctx))) err = radix_tree_insert(&ctx->handles_vma, handle, vma); else err = -ENOENT; -- cgit v1.2.3-70-g09d2 From c6d04e48d2e6d0e41c4cc4098c5494713086b597 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:51 +0200 Subject: drm/i915: Rename i915_gem_context_get_vm_rcu to i915_gem_context_get_eb_vm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The important part isn't so much that this does an rcu lookup - that's more an implementation detail, which will also be removed. The thing that makes this different from other functions is that it's gettting you the vm that batchbuffers will run in for that gem context, which is either a full ppgtt stored in gem->ctx, or the ggtt. We'll make more use of this function later on. Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-5-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.h | 2 +- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 4 ++-- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 4 ++-- drivers/gpu/drm/i915/gt/selftest_execlists.c | 2 +- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 4 ++-- drivers/gpu/drm/i915/selftests/i915_vma.c | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 18060536b0c2..da6e8b506d96 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -155,7 +155,7 @@ i915_gem_context_vm(struct i915_gem_context *ctx) } static inline struct i915_address_space * -i915_gem_context_get_vm_rcu(struct i915_gem_context *ctx) +i915_gem_context_get_eb_vm(struct i915_gem_context *ctx) { struct i915_address_space *vm; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index a094f3ce1a90..6c68fe26bb32 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1456,7 +1456,7 @@ static int igt_tmpfs_fallback(void *arg) struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; struct vfsmount *gemfs = i915->mm.gemfs; - struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); + struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx); struct drm_i915_gem_object *obj; struct i915_vma *vma; u32 *vaddr; @@ -1512,7 +1512,7 @@ static int igt_shrink_thp(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); + struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx); struct drm_i915_gem_object *obj; struct i915_gem_engines_iter it; struct intel_context *ce; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 4d2758718d21..fc7fb33a3a52 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -1528,7 +1528,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, intel_gt_chipset_flush(engine->gt); - vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); @@ -1607,7 +1607,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, if (GRAPHICS_VER(i915) >= 8) { const u32 GPR0 = engine->mmio_base + 0x600; - vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index f12ffe797639..b3863abc51f5 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -3493,7 +3493,7 @@ static int smoke_submit(struct preempt_smoke *smoke, if (batch) { struct i915_address_space *vm; - vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); vma = i915_vma_instance(batch, vm, NULL); i915_vm_put(vm); if (IS_ERR(vma)) diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 2c1ed32ca5ac..8be23e0f9306 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -117,7 +117,7 @@ static struct i915_request * hang_create_request(struct hang *h, struct intel_engine_cs *engine) { struct intel_gt *gt = h->gt; - struct i915_address_space *vm = i915_gem_context_get_vm_rcu(h->ctx); + struct i915_address_space *vm = i915_gem_context_get_eb_vm(h->ctx); struct drm_i915_gem_object *obj; struct i915_request *rq = NULL; struct i915_vma *hws, *vma; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index f843a5040706..2d60a5a5b065 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1300,7 +1300,7 @@ static int exercise_mock(struct drm_i915_private *i915, if (!ctx) return -ENOMEM; - vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); err = func(vm, 0, min(vm->total, limit), end_time); i915_vm_put(vm); @@ -1848,7 +1848,7 @@ static int igt_cs_tlb(void *arg) goto out_unlock; } - vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); if (i915_is_ggtt(vm)) goto out_vm; diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index dd0607254a95..79ba72da0813 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -118,7 +118,7 @@ static int create_vmas(struct drm_i915_private *i915, struct i915_vma *vma; int err; - vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); vma = checked_vma_instance(obj, vm, NULL); i915_vm_put(vm); if (IS_ERR(vma)) -- cgit v1.2.3-70-g09d2 From 24fad29e52e087317e91f08513b15ff7151d6d32 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:52 +0200 Subject: drm/i915: Use i915_gem_context_get_eb_vm in ctx_getparam MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consolidates the "which is the vm my execbuf runs in" code a bit. We do some get/put which isn't really required, but all the other users want the refcounting, and I figured doing a function just for this getparam to avoid 2 atomis is a bit much. Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-6-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 12e2de1db1a2..7a566fb7cca4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -2108,6 +2108,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, struct drm_i915_file_private *file_priv = file->driver_priv; struct drm_i915_gem_context_param *args = data; struct i915_gem_context *ctx; + struct i915_address_space *vm; int ret = 0; ctx = i915_gem_context_lookup(file_priv, args->ctx_id); @@ -2117,12 +2118,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, switch (args->param) { case I915_CONTEXT_PARAM_GTT_SIZE: args->size = 0; - rcu_read_lock(); - if (rcu_access_pointer(ctx->vm)) - args->value = rcu_dereference(ctx->vm)->total; - else - args->value = to_i915(dev)->ggtt.vm.total; - rcu_read_unlock(); + vm = i915_gem_context_get_eb_vm(ctx); + args->value = vm->total; + i915_vm_put(vm); + break; case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: -- cgit v1.2.3-70-g09d2 From a82a9979de227ac45d513ecade54fc9478a4181b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:53 +0200 Subject: drm/i915: Add i915_gem_context_is_full_ppgtt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And use it anywhere we have open-coded checks for ctx->vm that really only check for full ppgtt. Plus for paranoia add a GEM_BUG_ON that checks it's really only set when we have full ppgtt, just in case. gem_context->vm is different since it's NULL in ggtt mode, unlike intel_context->vm or gt->vm, which is always set. v2: 0day found a testcase that I missed. v3: Repaint shed (Jon, Tvrtko) Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-7-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_context.h | 7 +++++++ drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 6 +++--- 4 files changed, 12 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 7a566fb7cca4..b8067b35d2d6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1566,7 +1566,7 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, int err; u32 id; - if (!rcu_access_pointer(ctx->vm)) + if (!i915_gem_context_has_full_ppgtt(ctx)) return -ENODEV; rcu_read_lock(); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index da6e8b506d96..c50ded3d1cd3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -154,6 +154,13 @@ i915_gem_context_vm(struct i915_gem_context *ctx) return rcu_dereference_protected(ctx->vm, lockdep_is_held(&ctx->mutex)); } +static inline bool i915_gem_context_has_full_ppgtt(struct i915_gem_context *ctx) +{ + GEM_BUG_ON(!!rcu_access_pointer(ctx->vm) != HAS_FULL_PPGTT(ctx->i915)); + + return !!rcu_access_pointer(ctx->vm); +} + static inline struct i915_address_space * i915_gem_context_get_eb_vm(struct i915_gem_context *ctx) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 4816359c4c21..d2308145b4f1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -733,7 +733,7 @@ static int eb_select_context(struct i915_execbuffer *eb) return PTR_ERR(ctx); eb->gem_context = ctx; - if (rcu_access_pointer(ctx->vm)) + if (i915_gem_context_has_full_ppgtt(ctx)) eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; return 0; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index fc7fb33a3a52..1536c50144f8 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -704,7 +704,7 @@ static int igt_ctx_exec(void *arg) pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), engine->name, - yesno(!!rcu_access_pointer(ctx->vm)), + yesno(i915_gem_context_has_full_ppgtt(ctx)), err); intel_context_put(ce); kernel_context_close(ctx); @@ -838,7 +838,7 @@ static int igt_shared_ctx_exec(void *arg) pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), engine->name, - yesno(!!rcu_access_pointer(ctx->vm)), + yesno(i915_gem_context_has_full_ppgtt(ctx)), err); intel_context_put(ce); kernel_context_close(ctx); @@ -1417,7 +1417,7 @@ static int igt_ctx_readonly(void *arg) pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), ce->engine->name, - yesno(!!ctx_vm(ctx)), + yesno(i915_gem_context_has_full_ppgtt(ctx)), err); i915_gem_context_unlock_engines(ctx); goto out_file; -- cgit v1.2.3-70-g09d2 From 0483a301873309a285b2eccac723601006b990d7 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:54 +0200 Subject: drm/i915: Use i915_gem_context_get_eb_vm in intel_context_set_gem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit ccbc1b97948ab671335e950271e39766729736c3 Author: Jason Ekstrand Date: Thu Jul 8 10:48:30 2021 -0500 drm/i915/gem: Don't allow changing the VM on running contexts (v4) the gem_ctx->vm can't change anymore. Plus we always set the intel_context->vm, so might as well use the helper we have for that. This makes it very clear that we always overwrite intel_context->vm for userspace contexts, since the default is gt->vm, which is explicitly reserved for kernel context use. It would be good to split things up a bit further and avoid any possibility for an accident where we run kernel stuff in userspace vm or the other way round. Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-8-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index b8067b35d2d6..48ee96bf2643 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -791,16 +791,8 @@ static int intel_context_set_gem(struct intel_context *ce, ce->ring_size = SZ_16K; - if (rcu_access_pointer(ctx->vm)) { - struct i915_address_space *vm; - - rcu_read_lock(); - vm = context_get_vm_rcu(ctx); /* hmm */ - rcu_read_unlock(); - - i915_vm_put(ce->vm); - ce->vm = vm; - } + i915_vm_put(ce->vm); + ce->vm = i915_gem_context_get_eb_vm(ctx); if (ctx->sched.priority >= I915_PRIORITY_NORMAL && intel_engine_has_timeslices(ce->engine) && -- cgit v1.2.3-70-g09d2 From 9ec8795e7d91bc650db03dc6f5315667555dae11 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:55 +0200 Subject: drm/i915: Drop __rcu from gem_context->vm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's been invariant since commit ccbc1b97948ab671335e950271e39766729736c3 Author: Jason Ekstrand Date: Thu Jul 8 10:48:30 2021 -0500 drm/i915/gem: Don't allow changing the VM on running contexts (v4) this just completes the deed. I've tried to split out prep work for more careful review as much as possible, this is what's left: - get_ppgtt gets simplified since we don't need to grab a temporary reference - we can rely on the temporary reference for the gem_ctx while we inspect the vm. The new vm_id still needs a full i915_vm_open ofc. This also removes the final caller of context_get_vm_rcu - A pile of selftests can now just look at ctx->vm instead of rcu_dereference_protected( , true) or similar things. - All callers of i915_gem_context_vm also disappear. - I've changed the hugepage selftest to set scrub_64K without any locking, because when we inspect that setting we're also not taking any locks either. It works because it's a selftests that's careful (single threaded gives you nice ordering) and not a live driver where races can happen from anywhere. These can only be split up further if we have some intermediate state with a bunch more rcu_dereference_protected(ctx->vm, true), just to shut up lockdep and sparse. The conversion to __rcu happened in commit a4e7ccdac38ec8335d9e4e2656c1a041c77feae1 Author: Chris Wilson Date: Fri Oct 4 14:40:09 2019 +0100 drm/i915: Move context management under GEM Note that we're not breaking the actual bugfix in there: The real bugfix is pushing the i915_vm_relase onto a separate worker, to avoid locking inversion issues. The rcu conversion was just thrown in for entertainment value on top (no vm lookup isn't even close to anything that's a hotpath where removing the single spinlock can be measured). v2: Rebase over the change to move the i915_vm_put() into i915_gem_context_release(). v3: Trivial conflict against repainted shed. Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-9-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 55 +++------------------- drivers/gpu/drm/i915/gem/i915_gem_context.h | 8 ++-- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 2 +- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 4 +- .../gpu/drm/i915/gem/selftests/i915_gem_context.c | 24 ++++------ drivers/gpu/drm/i915/i915_trace.h | 2 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 2 +- 7 files changed, 22 insertions(+), 75 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 48ee96bf2643..c2ab0e22db0a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -742,44 +742,6 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, return ret; } -static struct i915_address_space * -context_get_vm_rcu(struct i915_gem_context *ctx) -{ - GEM_BUG_ON(!rcu_access_pointer(ctx->vm)); - - do { - struct i915_address_space *vm; - - /* - * We do not allow downgrading from full-ppgtt [to a shared - * global gtt], so ctx->vm cannot become NULL. - */ - vm = rcu_dereference(ctx->vm); - if (!kref_get_unless_zero(&vm->ref)) - continue; - - /* - * This ppgtt may have be reallocated between - * the read and the kref, and reassigned to a third - * context. In order to avoid inadvertent sharing - * of this ppgtt with that third context (and not - * src), we have to confirm that we have the same - * ppgtt after passing through the strong memory - * barrier implied by a successful - * kref_get_unless_zero(). - * - * Once we have acquired the current ppgtt of ctx, - * we no longer care if it is released from ctx, as - * it cannot be reallocated elsewhere. - */ - - if (vm == rcu_access_pointer(ctx->vm)) - return rcu_pointer_handoff(vm); - - i915_vm_put(vm); - } while (1); -} - static int intel_context_set_gem(struct intel_context *ce, struct i915_gem_context *ctx, struct intel_sseu sseu) @@ -990,7 +952,7 @@ static void i915_gem_context_release_work(struct work_struct *work) if (ctx->syncobj) drm_syncobj_put(ctx->syncobj); - vm = i915_gem_context_vm(ctx); + vm = ctx->vm; if (vm) i915_vm_put(vm); @@ -1216,7 +1178,7 @@ static void context_close(struct i915_gem_context *ctx) set_closed_name(ctx); - vm = i915_gem_context_vm(ctx); + vm = ctx->vm; if (vm) { /* i915_vm_close drops the final reference, which is a bit too * early and could result in surprises with concurrent @@ -1335,7 +1297,7 @@ i915_gem_create_context(struct drm_i915_private *i915, vm = &ppgtt->vm; } if (vm) { - RCU_INIT_POINTER(ctx->vm, i915_vm_open(vm)); + ctx->vm = i915_vm_open(vm); /* i915_vm_open() takes a reference */ i915_vm_put(vm); @@ -1561,15 +1523,12 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, if (!i915_gem_context_has_full_ppgtt(ctx)) return -ENODEV; - rcu_read_lock(); - vm = context_get_vm_rcu(ctx); - rcu_read_unlock(); - if (!vm) - return -ENODEV; + vm = ctx->vm; + GEM_BUG_ON(!vm); err = xa_alloc(&file_priv->vm_xa, &id, vm, xa_limit_32b, GFP_KERNEL); if (err) - goto err_put; + return err; i915_vm_open(vm); @@ -1577,8 +1536,6 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, args->value = id; args->size = 0; -err_put: - i915_vm_put(vm); return err; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index c50ded3d1cd3..d3279086a5e7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -156,9 +156,9 @@ i915_gem_context_vm(struct i915_gem_context *ctx) static inline bool i915_gem_context_has_full_ppgtt(struct i915_gem_context *ctx) { - GEM_BUG_ON(!!rcu_access_pointer(ctx->vm) != HAS_FULL_PPGTT(ctx->i915)); + GEM_BUG_ON(!!ctx->vm != HAS_FULL_PPGTT(ctx->i915)); - return !!rcu_access_pointer(ctx->vm); + return !!ctx->vm; } static inline struct i915_address_space * @@ -166,12 +166,10 @@ i915_gem_context_get_eb_vm(struct i915_gem_context *ctx) { struct i915_address_space *vm; - rcu_read_lock(); - vm = rcu_dereference(ctx->vm); + vm = ctx->vm; if (!vm) vm = &ctx->i915->ggtt.vm; vm = i915_vm_get(vm); - rcu_read_unlock(); return vm; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 0c38789bd4a8..c4617e4d9fa9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -262,7 +262,7 @@ struct i915_gem_context { * In other modes, this is a NULL pointer with the expectation that * the caller uses the shared global GTT. */ - struct i915_address_space __rcu *vm; + struct i915_address_space *vm; /** * @pid: process id of creator diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 6c68fe26bb32..5d71626a1ee5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1688,11 +1688,9 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) goto out_file; } - mutex_lock(&ctx->mutex); - vm = i915_gem_context_vm(ctx); + vm = ctx->vm; if (vm) WRITE_ONCE(vm->scrub_64K, true); - mutex_unlock(&ctx->mutex); err = i915_subtests(tests, ctx); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 1536c50144f8..b32f7fed2d9c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -27,12 +27,6 @@ #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) -static inline struct i915_address_space *ctx_vm(struct i915_gem_context *ctx) -{ - /* single threaded, private ctx */ - return rcu_dereference_protected(ctx->vm, true); -} - static int live_nop_switch(void *arg) { const unsigned int nctx = 1024; @@ -813,7 +807,7 @@ static int igt_shared_ctx_exec(void *arg) struct i915_gem_context *ctx; struct intel_context *ce; - ctx = kernel_context(i915, ctx_vm(parent)); + ctx = kernel_context(i915, parent->vm); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_test; @@ -823,7 +817,7 @@ static int igt_shared_ctx_exec(void *arg) GEM_BUG_ON(IS_ERR(ce)); if (!obj) { - obj = create_test_object(ctx_vm(parent), + obj = create_test_object(parent->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); @@ -1380,7 +1374,7 @@ static int igt_ctx_readonly(void *arg) goto out_file; } - vm = ctx_vm(ctx) ?: &i915->ggtt.alias->vm; + vm = ctx->vm ?: &i915->ggtt.alias->vm; if (!vm || !vm->has_read_only) { err = 0; goto out_file; @@ -1499,7 +1493,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); - err = check_scratch(ctx_vm(ctx), offset); + err = check_scratch(ctx->vm, offset); if (err) return err; @@ -1596,7 +1590,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); - err = check_scratch(ctx_vm(ctx), offset); + err = check_scratch(ctx->vm, offset); if (err) return err; @@ -1739,7 +1733,7 @@ static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) u32 *vaddr; int err = 0; - vm = ctx_vm(ctx); + vm = ctx->vm; if (!vm) return -ENODEV; @@ -1801,7 +1795,7 @@ static int igt_vm_isolation(void *arg) } /* We can only test vm isolation, if the vm are distinct */ - if (ctx_vm(ctx_a) == ctx_vm(ctx_b)) + if (ctx_a->vm == ctx_b->vm) goto out_file; /* Read the initial state of the scratch page */ @@ -1813,8 +1807,8 @@ static int igt_vm_isolation(void *arg) if (err) goto out_file; - vm_total = ctx_vm(ctx_a)->total; - GEM_BUG_ON(ctx_vm(ctx_b)->total != vm_total); + vm_total = ctx_a->vm->total; + GEM_BUG_ON(ctx_b->vm->total != vm_total); count = 0; num_engines = 0; diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 806ad688274b..237e5061381b 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -1246,7 +1246,7 @@ DECLARE_EVENT_CLASS(i915_context, TP_fast_assign( __entry->dev = ctx->i915->drm.primary->index; __entry->ctx = ctx; - __entry->vm = rcu_access_pointer(ctx->vm); + __entry->vm = ctx->vm; ), TP_printk("dev=%u, ctx=%p, ctx_vm=%p", diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 79ba72da0813..1f10fe36619b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -39,7 +39,7 @@ static bool assert_vma(struct i915_vma *vma, { bool ok = true; - if (vma->vm != rcu_access_pointer(ctx->vm)) { + if (vma->vm != ctx->vm) { pr_err("VMA created with wrong VM\n"); ok = false; } -- cgit v1.2.3-70-g09d2 From 843151521844af6c3e22d4bef42d292c04f05fa2 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:56 +0200 Subject: drm/i915: use xa_lock/unlock for fpriv->vm_xa lookups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need the absolute speed of rcu for this. And i915_address_space in general dont need rcu protection anywhere else, after we've made gem contexts and engines a lot more immutable. Note that this semantically reverts commit aabbe344dc3ca5f7d8263a02608ba6179e8a4499 Author: Chris Wilson Date: Fri Aug 30 19:03:25 2019 +0100 drm/i915: Use RCU for unlocked vm_idr lookup except we have the conversion from idr to xarray in between. v2: kref_get_unless_zero is no longer required (Maarten) Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-10-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/i915_drv.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0558921663d9..94175a49c151 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1888,11 +1888,11 @@ i915_gem_vm_lookup(struct drm_i915_file_private *file_priv, u32 id) { struct i915_address_space *vm; - rcu_read_lock(); + xa_lock(&file_priv->vm_xa); vm = xa_load(&file_priv->vm_xa, id); - if (vm && !kref_get_unless_zero(&vm->ref)) - vm = NULL; - rcu_read_unlock(); + if (vm) + kref_get(&vm->ref); + xa_unlock(&file_priv->vm_xa); return vm; } -- cgit v1.2.3-70-g09d2 From dcc5d82063d9055cecd09bec4d280c5ab62b9d8f Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:57 +0200 Subject: drm/i915: Stop rcu support for i915_address_space MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The full audit is quite a bit of work: - i915_dpt has very simple lifetime (somehow we create a display pagetable vm per object, so its _very_ simple, there's only ever a single vma in there), and uses i915_vm_close(), which internally does a i915_vm_put(). No rcu. Aside: wtf is i915_dpt doing in the intel_display.c garbage collector as a new feature, instead of added as a separate file with some clean-ish interface. Also, i915_dpt unfortunately re-introduces some coding patterns from pre-dma_resv_lock conversion times. - i915_gem_proto_ctx is fully refcounted and no rcu, all protected by fpriv->proto_context_lock. - i915_gem_context is itself rcu protected, and that might leak to anything it points at. Before commit cf977e18610e66e48c31619e7e0cfa871be9eada Author: Chris Wilson Date: Wed Dec 2 11:21:40 2020 +0000 drm/i915/gem: Spring clean debugfs and commit db80a1294c231b6ac725085f046bb2931e00c9db Author: Chris Wilson Date: Mon Jan 18 11:08:54 2021 +0000 drm/i915/gem: Remove per-client stats from debugfs/i915_gem_objects we had a bunch of debugfs files that relied on rcu protecting everything, but those are gone now. The main one was removed even earlier with There doesn't seem to be anything left that's actually protecting stuff now that the ctx->vm itself is invariant. See commit ccbc1b97948ab671335e950271e39766729736c3 Author: Jason Ekstrand Date: Thu Jul 8 10:48:30 2021 -0500 drm/i915/gem: Don't allow changing the VM on running contexts (v4) Note that we drop the vm refcount before the final release of the gem context refcount, so this is all very dangerous even without rcu. Note that aside from later on creating new engines (a defunct feature) and debug output we're never looked at gem_ctx->vm for anything functional, hence why this is ok. Fingers crossed. Preceeding patches removed all vestiges of rcu use from gem_ctx->vm derferencing to make it clear it's really not used. The gem_ctx->rcu protection was introduced in commit a4e7ccdac38ec8335d9e4e2656c1a041c77feae1 Author: Chris Wilson Date: Fri Oct 4 14:40:09 2019 +0100 drm/i915: Move context management under GEM The commit message is somewhat entertaining because it fails to mention this fact completely, and compensates that by an in-commit changelog entry that claims that ctx->vm is protected by ctx->mutex. Which was the case _before_ this commit, but no longer after it. - intel_context holds a full reference. Unfortunately intel_context is also rcu protected and the reference to the ->vm is dropped before the rcu barrier - only the kfree is delayed. So again we need to check whether that leaks anywhere on the intel_context->vm. RCU is only used to protect intel_context sitting on the breadcrumb lists, which don't look at the vm anywhere, so we are fine. Nothing else relies on rcu protection of intel_context and hence is fully protected by the kref refcount alone, which protects intel_context->vm in turn. The breadcrumbs rcu usage was added in commit c744d50363b714783bbc88d986cc16def13710f7 Author: Chris Wilson Date: Thu Nov 26 14:04:06 2020 +0000 drm/i915/gt: Split the breadcrumb spinlock between global and contexts its parent commit added the intel_context rcu protection: commit 14d1eaf08845c534963c83f754afe0cb14cb2512 Author: Chris Wilson Date: Thu Nov 26 14:04:05 2020 +0000 drm/i915/gt: Protect context lifetime with RCU given some credence to my claim that I've actually caught them all. - drm_i915_gem_object's shares_resv_from pointer has a full refcount to the dma_resv, which is a sub-refcount that's released after the final i915_vm_put() has been called. Safe. Aside: Maybe we should have a struct dma_resv_shared which is just dma_resv + kref as a stand-alone thing. It's a pretty useful pattern which other drivers might want to copy. For a bit more context see commit 4d8151ae5329cf50781a02fd2298a909589a5bab Author: Thomas Hellström Date: Tue Jun 1 09:46:41 2021 +0200 drm/i915: Don't free shared locks while shared - the fpriv->vm_xa was relying on rcu_read_lock for lookup, but that was updated in a prep patch too to just be a spinlock-protected lookup. - intel_gt->vm is set at driver load in intel_gt_init() and released in intel_gt_driver_release(). There seems to be some issue that in some error paths this is called twice, but otherwise no rcu to be found anywhere. This was added in the below commit, which unfortunately doesn't explain why this complication exists. commit e6ba76480299a0d77c51d846f7467b1673aad25b Author: Chris Wilson Date: Sat Dec 21 16:03:24 2019 +0000 drm/i915: Remove i915->kernel_context The proper fix most likely for this is to start using drmm_ at large scale, but that's also huge amounts of work. - i915_vma->vm is some real pain, because rcu is rcu protected, at least in the vma lookup in the context lookup cache in eb_lookup_vma(). This was added in commit 4ff4b44cbb70c269259958cbcc48d7b8a2cb9ec8 Author: Chris Wilson Date: Fri Jun 16 15:05:16 2017 +0100 drm/i915: Store a direct lookup from object handle to vma This was changed to a radix tree from the hashtable in, but with the locking unchanged, in commit d1b48c1e7184d9bc4ae6d7f9fe2eed9efed11ffc Author: Chris Wilson Date: Wed Aug 16 09:52:08 2017 +0100 drm/i915: Replace execbuf vma ht with an idr In commit 93159e12353c2a47e5576d642845a91fa00530bf Author: Chris Wilson Date: Mon Mar 23 09:28:41 2020 +0000 drm/i915/gem: Avoid gem_context->mutex for simple vma lookup the locking was changed from dev->struct_mutex to rcu, which added the requirement to rcu protect i915_vma. Somehow this was missed in review (or I'm completely blind). Irrespective of all that the vma lookup cache rcu_read_lock grabs a full reference of the vma and the rcu doesn't leak further. So no impact on i915_address_space from that. I have not found any other rcu use for i915_vma, but given that it seems broken I also didn't bother to do a careful in-depth audit. Alltogether there's nothing left in-tree anymore which requires that a pointer deref to an i915_address_space is safe undre rcu_read_lock only. rcu protection of i915_address_space was introduced in commit b32fa811156328aea5a3c2ff05cc096490382456 Author: Chris Wilson Date: Thu Jun 20 19:37:05 2019 +0100 drm/i915/gtt: Defer address space cleanup to an RCU worker by mixing up a bugfixing (i915_address_space needs to be released from a worker) with enabling rcu support. The commit message also seems somewhat confused, because it talks about cleanup of WC pages requiring sleep, while the code and linked bugzilla are about a requirement to take dev->struct_mutex (which yes sleeps but it's a much more specific problem). Since final kref_put can be called from pretty much anywhere (including hardirq context through the scheduler's i915_active cleanup) we need a worker here. Hence that part must be kept. Ideally all these reclaim workers should have some kind of integration with our shrinkers, but for some of these it's rather tricky. Anyway, that's a preexisting condition in the codeebase that we wont fix in this patch here. We also remove the rcu_barrier in ggtt_cleanup_hw added in commit 60a4233a4952729089e4df152e730f8f4d0e82ce Author: Chris Wilson Date: Mon Jul 29 14:24:12 2019 +0100 drm/i915: Flush the i915_vm_release before ggtt shutdown Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-11-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 1 - drivers/gpu/drm/i915/gt/intel_gtt.c | 6 +++--- drivers/gpu/drm/i915/gt/intel_gtt.h | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index de3ac58fceec..8d71f67926f1 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -727,7 +727,6 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) atomic_set(&ggtt->vm.open, 0); - rcu_barrier(); /* flush the RCU'ed__i915_vm_release */ flush_workqueue(ggtt->vm.i915->wq); mutex_lock(&ggtt->vm.mutex); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index e137dd32b5b8..a0c2b952aa57 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -155,7 +155,7 @@ void i915_vm_resv_release(struct kref *kref) static void __i915_vm_release(struct work_struct *work) { struct i915_address_space *vm = - container_of(work, struct i915_address_space, rcu.work); + container_of(work, struct i915_address_space, release_work); vm->cleanup(vm); i915_address_space_fini(vm); @@ -171,7 +171,7 @@ void i915_vm_release(struct kref *kref) GEM_BUG_ON(i915_is_ggtt(vm)); trace_i915_ppgtt_release(vm); - queue_rcu_work(vm->i915->wq, &vm->rcu); + queue_work(vm->i915->wq, &vm->release_work); } void i915_address_space_init(struct i915_address_space *vm, int subclass) @@ -185,7 +185,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) if (!kref_read(&vm->resv_ref)) kref_init(&vm->resv_ref); - INIT_RCU_WORK(&vm->rcu, __i915_vm_release); + INIT_WORK(&vm->release_work, __i915_vm_release); atomic_set(&vm->open, 1); /* diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index bc7153018ebd..5b539bd7645d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -213,7 +213,7 @@ struct i915_vma_ops { struct i915_address_space { struct kref ref; - struct rcu_work rcu; + struct work_struct release_work; struct drm_mm mm; struct intel_gt *gt; -- cgit v1.2.3-70-g09d2 From f5392e5f8ef300c5d8fb97fb441aad217e44f394 Mon Sep 17 00:00:00 2001 From: ravitejax Date: Fri, 3 Sep 2021 23:50:34 +0530 Subject: drm/i915/adl_s: Remove require_force_probe protection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removing force probe protection from ADLS platform. Did not observe warnings, errors, flickering or any visual defects while doing ordinary tasks like browsing and editing documents in a two monitor setup. For more info drm-tip idle run results : https://intel-gfx-ci.01.org/tree/drm-tip/bat-all.html? Cc: Lucas De Marchi Signed-off-by: ravitejax Reviewed-by: Ayaz A Siddiqui Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210903182034.668467-1-ravitejax.gpud.talla@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index f0e2f5e67bda..4745f7aad848 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -926,7 +926,6 @@ static const struct intel_device_info adl_s_info = { GEN12_FEATURES, PLATFORM(INTEL_ALDERLAKE_S), .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), - .require_force_probe = 1, .display.has_hti = 1, .display.has_psr_hw_tracking = 0, .platform_engine_mask = -- cgit v1.2.3-70-g09d2 From 3f027d61663fc20622a9563ab1463fab17672289 Mon Sep 17 00:00:00 2001 From: Ayaz A Siddiqui Date: Tue, 7 Sep 2021 22:46:39 +0530 Subject: drm/i915/gt: Add separate MOCS table for Gen12 devices other than TGL/RKL MOCS table of TGL/RKL has MOCS[1] set to L3_UC. While for other gen12 devices we need to set MOCS[1] as L3_WB, So adding a new MOCS table for other gen 12 devices eg. ADL. Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device specific values") Cc: Matt Roper Signed-off-by: Ayaz A Siddiqui Reviewed-by: Matt Roper [mattrope: fix whitespace error] Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210907171639.1221287-1-ayaz.siddiqui@intel.com --- drivers/gpu/drm/i915/gt/intel_mocs.c | 40 ++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index e96afd7beb49..f47521b8e941 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -315,6 +315,34 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = { MOCS_ENTRY(63, 0, L3_1_UC), }; +static const struct drm_i915_mocs_entry gen12_mocs_table[] = { + GEN11_MOCS_ENTRIES, + /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ + MOCS_ENTRY(48, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + L3 */ + MOCS_ENTRY(49, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + LLC */ + MOCS_ENTRY(50, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* Implicitly enable L1 - HDC:L1 */ + MOCS_ENTRY(51, + LE_1_UC | LE_TC_1_LLC, + L3_1_UC), + /* HW Special Case (CCS) */ + MOCS_ENTRY(60, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* HW Special Case (Displayable) */ + MOCS_ENTRY(61, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), +}; + enum { HAS_GLOBAL_MOCS = BIT(0), HAS_ENGINE_MOCS = BIT(1), @@ -351,14 +379,18 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->uc_index = 1; table->unused_entries_index = 5; - } else if (GRAPHICS_VER(i915) >= 12) { + } else if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) { + /* For TGL/RKL, Can't be changed now for ABI reasons */ table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->uc_index = 3; - /* For TGL/RKL, Can't be changed now for ABI reasons */ - if (!IS_TIGERLAKE(i915) && !IS_ROCKETLAKE(i915)) - table->unused_entries_index = 2; + } else if (GRAPHICS_VER(i915) >= 12) { + table->size = ARRAY_SIZE(gen12_mocs_table); + table->table = gen12_mocs_table; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 3; + table->unused_entries_index = 2; } else if (GRAPHICS_VER(i915) == 11) { table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; -- cgit v1.2.3-70-g09d2 From 502d0609fc418e674f1e8bd30aa02748e4c6b465 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 3 Sep 2021 16:53:17 +0100 Subject: drm/i915/gtt: add some flushing for the 64K GTT path If we need to mark the PDE as operating in 64K GTT mode, we should be paranoid and flush the extra writes, like we already do for the PTEs. On some platforms the clflush can apparently add the just the right amount of magical delay to force the GPU to see the updated entry. Signed-off-by: Matthew Auld Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20210903155317.1854012-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 6e0e52eeb87a..6a5af995f5b1 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -548,6 +548,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, I915_GTT_PAGE_SIZE_2M)))) { vaddr = px_vaddr(pd); vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; + clflush_cache_range(vaddr, PAGE_SIZE); page_size = I915_GTT_PAGE_SIZE_64K; /* @@ -568,6 +569,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, for (i = 1; i < index; i += 16) memset64(vaddr + i, encode, 15); + clflush_cache_range(vaddr, PAGE_SIZE); } } -- cgit v1.2.3-70-g09d2 From f503eb0cf2badfd8a70dac5d2a48a3e83550278e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 6 Sep 2021 10:17:29 +0100 Subject: drm/i915/selftests: fixup igt_shrink_thp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the object might still be active here, the shrink_all will simply ignore it, which blows up in the test, since the pages will still be there. Currently THP is disabled which should result in the test being skipped, but if we ever re-enable THP we might start seeing the failure. Fix this by forcing I915_SHRINK_ACTIVE. v2: Some machine in the shard runs doesn't seem to have any available swap when running this test. Try to handle this. Signed-off-by: Matthew Auld Cc: Tvrtko Ursulin Cc: Thomas Hellström Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20210906091729.2093312-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 30 +++++++++++++++++++------ 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 5d71626a1ee5..0827634c842c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1519,6 +1519,7 @@ static int igt_shrink_thp(void *arg) struct i915_vma *vma; unsigned int flags = PIN_USER; unsigned int n; + bool should_swap; int err = 0; /* @@ -1567,23 +1568,38 @@ static int igt_shrink_thp(void *arg) break; } i915_gem_context_unlock_engines(ctx); + /* + * Nuke everything *before* we unpin the pages so we can be reasonably + * sure that when later checking get_nr_swap_pages() that some random + * leftover object doesn't steal the remaining swap space. + */ + i915_gem_shrink(NULL, i915, -1UL, NULL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_ACTIVE); i915_vma_unpin(vma); if (err) goto out_put; /* - * Now that the pages are *unpinned* shrink-all should invoke - * shmem to truncate our pages. + * Now that the pages are *unpinned* shrinking should invoke + * shmem to truncate our pages, if we have available swap. */ - i915_gem_shrink_all(i915); - if (i915_gem_object_has_pages(obj)) { - pr_err("shrink-all didn't truncate the pages\n"); + should_swap = get_nr_swap_pages() > 0; + i915_gem_shrink(NULL, i915, -1UL, NULL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_ACTIVE); + if (should_swap == i915_gem_object_has_pages(obj)) { + pr_err("unexpected pages mismatch, should_swap=%s\n", + yesno(should_swap)); err = -EINVAL; goto out_put; } - if (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys) { - pr_err("residual page-size bits left\n"); + if (should_swap == (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys)) { + pr_err("unexpected residual page-size bits, should_swap=%s\n", + yesno(should_swap)); err = -EINVAL; goto out_put; } -- cgit v1.2.3-70-g09d2 From 058d7d62602868fa430555311fa45dfda2168349 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 2 Sep 2021 22:57:37 +0100 Subject: drm/i915: clean up inconsistent indenting There is a statement that is indented one character too deeply, clean this up. Signed-off-by: Colin Ian King Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210902215737.55570-1-colin.king@canonical.com --- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index de5f9c86b9a4..aeb324b701ec 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2565,7 +2565,7 @@ __execlists_context_pre_pin(struct intel_context *ce, if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags)) { lrc_init_state(ce, engine, *vaddr); - __i915_gem_object_flush_map(ce->state->obj, 0, engine->context_size); + __i915_gem_object_flush_map(ce->state->obj, 0, engine->context_size); } return 0; -- cgit v1.2.3-70-g09d2 From 74388ca483a416a92cee69dcbeeb793d39199371 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 9 Sep 2021 12:44:48 +0100 Subject: drm/i915: Use Transparent Hugepages when IOMMU is enabled Usage of Transparent Hugepages was disabled in 9987da4b5dcf ("drm/i915: Disable THP until we have a GPU read BW W/A"), but since it appears majority of performance regressions reported with an enabled IOMMU can be almost eliminated by turning them on, lets just do that. To err on the side of safety we keep the current default in cases where IOMMU is not active, and only when it is default to the "huge=within_size" mode. Although there probably would be wins to enable them throughout, more extensive testing across benchmarks and platforms would need to be done. With the patch and IOMMU enabled my local testing on a small Skylake part shows OglVSTangent regression being reduced from ~14% (IOMMU on versus IOMMU off) to ~2% (same comparison but with THP on). More detailed testing done in the below referenced Gitlab issue by Eero: Skylake GT4e: Performance drops from enabling IOMMU: 30-35% SynMark CSDof 20-25% Unigine Heaven, MemBW GPU write, SynMark VSTangent ~20% GLB Egypt (1/2 screen window) 10-15% GLB T-Rex (1/2 screen window) 8-10% GfxBench T-Rex, MemBW GPU blit 7-8% SynMark DeferredAA + TerrainFly* + ZBuffer 6-7% GfxBench Manhattan 3.0 + 3.1, SynMark TexMem128 & CSCloth 5-6% GfxBench CarChase, Unigine Valley 3-5% GfxBench Vulkan & GL AztecRuins + ALU2, MemBW GPU texture, SynMark Fill*, Deferred, TerrainPan* 1-2% Most of the other tests With the patch drops become: 20-25% SynMark TexMem* 15-20% GLB Egypt (1/2 screen window) 10-15% GLB T-Rex (1/2 screen window) 4-7% GfxBench T-Rex, GpuTest Triangle 1-8% GfxBench ALU2 (offscreen 1%, onscreen 8%) 3% GfxBench Manhattan 3.0, SynMark CSDof 2-3% Unigine Heaven + Valley, MemBW GPU texture 1-3 GfxBench Manhattan 3.1 + CarChase + Vulkan & GL AztecRuins Broxton: Performance drops from IOMMU, without patch: 30% MemBW GPU write 25% SynMark ZBuffer + Fill* 20% MemBW GPU blit 15% MemBW GPU blend, GpuTest Triangle 10-15% MemBW GPU texture 10% GLB Egypt, Unigine Heaven (had hangs), SynMark TerrainFly* 7-9% GLB T-Rex, GfxBench Manhattan 3.0 + T-Rex, SynMark Deferred* + TexMem* 6-8% GfxBench CarChase, Unigine Valley, SynMark CSCloth + ShMapVsm + TerrainPan* 5-6% GfxBench Manhattan 3.1 + GL AztecRuins, SynMark CSDof + TexFilterTri 2-4% GfxBench ALU2, SynMark DrvRes + GSCloth + ShMapPcf + Batch[0-5] + TexFilterAniso, GpuTest GiMark + 32-bit Julia And with patch: 15-20% MemBW GPU texture 10% SynMark TexMem* 8-9% GLB Egypt (1/2 screen window) 4-5% GLB T-Rex (1/2 screen window) 3-6% GfxBench Manhattan 3.0, GpuTest FurMark, SynMark Deferred + TexFilterTri 3-4% GfxBench Manhattan 3.1 + T-Rex, SynMark VSInstancing 2-4% GpuTest Triangle, SynMark DeferredAA 2-3% Unigine Heaven + Valley 1-3% SynMark Terrain* 1-2% GfxBench CarChase, SynMark TexFilterAniso + ZBuffer Tigerlake-H: 20-25% MemBW GPU texture 15-20% GpuTest Triangle 13-15% SynMark TerrainFly* + DeferredAA + HdrBloom 8-10% GfxBench Manhattan 3.1, SynMark TerrainPan* + DrvRes 6-7% GfxBench Manhattan 3.0, SynMark TexMem* 4-8% GLB onscreen Fill + T-Rex + Egypt (more in onscreen than offscreen versions of T-Rex/Egypt) 4-6% GfxBench CarChase + GLES AztecRuins + ALU2, GpuTest 32-bit Julia, SynMark CSDof + DrvState 3-5% GfxBench T-Rex + Egypt, Unigine Heaven + Valley, GpuTest Plot3D 1-7% Media tests 2-3% MemBW GPU blit 1-3% Most of the rest of 3D tests With the patch: 6-8% MemBW GPU blend => the only regression in these tests (compared to IOMMU without THP) 4-6% SynMark DrvState (not impacted) + HdrBloom (improved) 3-4% GLB T-Rex ~3% GLB Egypt, SynMark DrvRes 1-3% GfxBench T-Rex + Egypt, SynMark TexFilterTri 1-2% GfxBench CarChase + GLES AztecRuins, Unigine Valley, GpuTest Triangle ~1% GfxBench Manhattan 3.0/3.1, Unigine Heaven Perf of several tests actually improved with IOMMU + THP, compared to no IOMMU / no THP: 10-15% SynMark Batch[0-3] 5-10% MemBW GPU texture, SynMark ShMapVsm 3-4% SynMark Fill* + Geom* 2-3% SynMark TexMem512 + CSCloth 1-2% SynMark TexMem128 + DeferredAA As a summary across all platforms, these are the benchmarks where enabling THP on top of IOMMU enabled brings regressions: * Skylake GT4e: 20-25% SynMark TexMem* (whereas all MemBW GPU tests either improve or are not affected) * Broxton J4205: 7% MemBW GPU texture 2-3% SynMark TexMem* * Tigerlake-H: 7% MemBW GPU blend Other benchmarks show either lowering of regressions or improvements. v2: * Add Kconfig dependency to transparent hugepages and some help text. * Move to helper for easier handling of kernel build options. v3: * Drop Kconfig. (Daniel) v4: * Add some benchmark results to commit message. v5: * Add explicit regression summary to commit message. (Eero) References: b901bb89324a ("drm/i915/gemfs: enable THP") References: 9987da4b5dcf ("drm/i915: Disable THP until we have a GPU read BW W/A") References: https://gitlab.freedesktop.org/drm/intel/-/issues/430 Co-developed-by: Chris Wilson Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Matthew Auld Cc: Eero Tamminen Cc: Tvrtko Ursulin Cc: Rodrigo Vivi Cc: Daniel Vetter Signed-off-by: Tvrtko Ursulin Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210909114448.508493-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gemfs.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c index 5e6e8c91ab38..dbdbdc344d87 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.c +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c @@ -6,7 +6,6 @@ #include #include -#include #include "i915_drv.h" #include "i915_gemfs.h" @@ -15,6 +14,7 @@ int i915_gemfs_init(struct drm_i915_private *i915) { struct file_system_type *type; struct vfsmount *gemfs; + char *opts; type = get_fs_type("tmpfs"); if (!type) @@ -26,10 +26,26 @@ int i915_gemfs_init(struct drm_i915_private *i915) * * One example, although it is probably better with a per-file * control, is selecting huge page allocations ("huge=within_size"). - * Currently unused due to bandwidth issues (slow reads) on Broadwell+. + * However, we only do so to offset the overhead of iommu lookups + * due to bandwidth issues (slow reads) on Broadwell+. */ - gemfs = kern_mount(type); + opts = NULL; + if (intel_vtd_active()) { + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { + static char huge_opt[] = "huge=within_size"; /* r/w */ + + opts = huge_opt; + drm_info(&i915->drm, + "Transparent Hugepage mode '%s'\n", + opts); + } else { + drm_notice(&i915->drm, + "Transparent Hugepage support is recommended for optimal performance when IOMMU is enabled!\n"); + } + } + + gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, opts); if (IS_ERR(gemfs)) return PTR_ERR(gemfs); -- cgit v1.2.3-70-g09d2 From f25e3908b9cd4a3fe819e9bdcdde58f20bacb34c Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Tue, 7 Sep 2021 16:27:04 -0700 Subject: drm/i915: Get PM ref before accessing HW register Seeing these errors when GT is likely in suspend state- "RPM wakelock ref not held during HW access" Ensure GT is awake before trying to access HW registers. Avoid reading the register if that is not the case. Signed-off-by: Vinay Belgaumkar Fixes: 41e5c17ebfc2 ("drm/i915/guc/slpc: Sysfs hooks for SLPC") Reviewed-by: Tvrtko Ursulin Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210907232704.12982-1-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/intel_rps.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 3489f5f0cac1..e1a198bbd135 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1969,8 +1969,14 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps) u32 intel_rps_read_punit_req(struct intel_rps *rps) { struct intel_uncore *uncore = rps_to_uncore(rps); + struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; + intel_wakeref_t wakeref; + u32 freq = 0; - return intel_uncore_read(uncore, GEN6_RPNSWREQ); + with_intel_runtime_pm_if_in_use(rpm, wakeref) + freq = intel_uncore_read(uncore, GEN6_RPNSWREQ); + + return freq; } static u32 intel_rps_get_req(u32 pureq) -- cgit v1.2.3-70-g09d2 From fc30a6764a54dea42291aeb7009bef7aa2fc1cd4 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:22 -0700 Subject: drm/i915/guc: Fix blocked context accounting Prior to this patch the blocked context counter was cleared on init_sched_state (used during registering a context & resets) which is incorrect. This state needs to be persistent or the counter can read the incorrect value resulting in scheduling never getting enabled again. Fixes: 62eaf0ae217d ("drm/i915/guc: Support request cancellation") Signed-off-by: Matthew Brost Reviewed-by: Daniel Vetter Cc: Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-2-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 87d8dc8f51b9..69faa39da178 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -152,7 +152,7 @@ static inline void init_sched_state(struct intel_context *ce) { /* Only should be called from guc_lrc_desc_pin() */ atomic_set(&ce->guc_sched_state_no_lock, 0); - ce->guc_state.sched_state = 0; + ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; } static inline bool -- cgit v1.2.3-70-g09d2 From 669b949c1a44d0cb2bcd18ff6ab4fd0c21e7cf6f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:23 -0700 Subject: drm/i915/guc: Fix outstanding G2H accounting A small race that could result in incorrect accounting of the number of outstanding G2H. Basically prior to this patch we did not increment the number of outstanding G2H if we encoutered a GT reset while sending a H2G. This was incorrect as the context state had already been updated to anticipate a G2H response thus the counter should be incremented. As part of this change we remove a legacy (now unused) path that was the last caller requiring a G2H response that was not guaranteed to loop. This allows us to simplify the accounting as we don't need to handle the case where the send fails due to the channel being busy. Also always use helper when decrementing this value. v2 (Daniele): update GEM_BUG_ON check, pull in dead code removal from later patch, remove loop param from context_deregister. Fixes: f4eb1f3fe946 ("drm/i915/guc: Ensure G2H response has space in buffer") Signed-off-by: Matthew Brost Signed-off-by: Daniele Ceraolo Spurio Cc: Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-3-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 79 +++++++++++------------ 1 file changed, 37 insertions(+), 42 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 69faa39da178..aff5dd247a88 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -352,20 +352,29 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, xa_unlock_irqrestore(&guc->context_lookup, flags); } +static void decr_outstanding_submission_g2h(struct intel_guc *guc) +{ + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) + wake_up_all(&guc->ct.wq); +} + static int guc_submission_send_busy_loop(struct intel_guc *guc, const u32 *action, u32 len, u32 g2h_len_dw, bool loop) { - int err; - - err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); + /* + * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), + * so we don't handle the case where we don't get a reply because we + * aborted the send due to the channel being busy. + */ + GEM_BUG_ON(g2h_len_dw && !loop); - if (!err && g2h_len_dw) + if (g2h_len_dw) atomic_inc(&guc->outstanding_submission_g2h); - return err; + return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); } int intel_guc_wait_for_pending_msg(struct intel_guc *guc, @@ -616,7 +625,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) init_sched_state(ce); if (pending_enable || destroyed || deregister) { - atomic_dec(&guc->outstanding_submission_g2h); + decr_outstanding_submission_g2h(guc); if (deregister) guc_signal_context_fence(ce); if (destroyed) { @@ -635,7 +644,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) intel_engine_signal_breadcrumbs(ce->engine); } intel_context_sched_disable_unpin(ce); - atomic_dec(&guc->outstanding_submission_g2h); + decr_outstanding_submission_g2h(guc); spin_lock_irqsave(&ce->guc_state.lock, flags); guc_blocked_fence_complete(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); @@ -1233,8 +1242,7 @@ static int register_context(struct intel_context *ce, bool loop) } static int __guc_action_deregister_context(struct intel_guc *guc, - u32 guc_id, - bool loop) + u32 guc_id) { u32 action[] = { INTEL_GUC_ACTION_DEREGISTER_CONTEXT, @@ -1243,16 +1251,16 @@ static int __guc_action_deregister_context(struct intel_guc *guc, return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), G2H_LEN_DW_DEREGISTER_CONTEXT, - loop); + true); } -static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop) +static int deregister_context(struct intel_context *ce, u32 guc_id) { struct intel_guc *guc = ce_to_guc(ce); trace_intel_context_deregister(ce); - return __guc_action_deregister_context(guc, guc_id, loop); + return __guc_action_deregister_context(guc, guc_id); } static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask) @@ -1340,26 +1348,23 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) * registering this context. */ if (context_registered) { + bool disabled; + unsigned long flags; + trace_intel_context_steal_guc_id(ce); - if (!loop) { + GEM_BUG_ON(!loop); + + /* Seal race with Reset */ + spin_lock_irqsave(&ce->guc_state.lock, flags); + disabled = submission_disabled(guc); + if (likely(!disabled)) { set_context_wait_for_deregister_to_register(ce); intel_context_get(ce); - } else { - bool disabled; - unsigned long flags; - - /* Seal race with Reset */ - spin_lock_irqsave(&ce->guc_state.lock, flags); - disabled = submission_disabled(guc); - if (likely(!disabled)) { - set_context_wait_for_deregister_to_register(ce); - intel_context_get(ce); - } - spin_unlock_irqrestore(&ce->guc_state.lock, flags); - if (unlikely(disabled)) { - reset_lrc_desc(guc, desc_idx); - return 0; /* Will get registered later */ - } + } + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (unlikely(disabled)) { + reset_lrc_desc(guc, desc_idx); + return 0; /* Will get registered later */ } /* @@ -1367,13 +1372,9 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) * context whose guc_id was stolen. */ with_intel_runtime_pm(runtime_pm, wakeref) - ret = deregister_context(ce, ce->guc_id, loop); - if (unlikely(ret == -EBUSY)) { - clr_context_wait_for_deregister_to_register(ce); - intel_context_put(ce); - } else if (unlikely(ret == -ENODEV)) { + ret = deregister_context(ce, ce->guc_id); + if (unlikely(ret == -ENODEV)) ret = 0; /* Will get registered later */ - } } else { with_intel_runtime_pm(runtime_pm, wakeref) ret = register_context(ce, loop); @@ -1730,7 +1731,7 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) GEM_BUG_ON(context_enabled(ce)); clr_context_registered(ce); - deregister_context(ce, ce->guc_id, true); + deregister_context(ce, ce->guc_id); } static void __guc_context_destroy(struct intel_context *ce) @@ -2583,12 +2584,6 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) return ce; } -static void decr_outstanding_submission_g2h(struct intel_guc *guc) -{ - if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) - wake_up_all(&guc->ct.wq); -} - int intel_guc_deregister_done_process_msg(struct intel_guc *guc, const u32 *msg, u32 len) -- cgit v1.2.3-70-g09d2 From c39f51cc980dd918c5b3da61d54c4725785e766e Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:24 -0700 Subject: drm/i915/guc: Unwind context requests in reverse order When unwinding requests on a reset context, if other requests in the context are in the priority list the requests could be resubmitted out of seqno order. Traverse the list of active requests in reverse and append to the head of the priority list to fix this. Fixes: eb5e7da736f3 ("drm/i915/guc: Reset implementation for new GuC interface") Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Cc: Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-4-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index aff5dd247a88..0c1e6b465fba 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -806,9 +806,9 @@ __unwind_incomplete_requests(struct intel_context *ce) spin_lock_irqsave(&sched_engine->lock, flags); spin_lock(&ce->guc_active.lock); - list_for_each_entry_safe(rq, rn, - &ce->guc_active.requests, - sched.link) { + list_for_each_entry_safe_reverse(rq, rn, + &ce->guc_active.requests, + sched.link) { if (i915_request_completed(rq)) continue; @@ -825,7 +825,7 @@ __unwind_incomplete_requests(struct intel_context *ce) } GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); - list_add_tail(&rq->sched.link, pl); + list_add(&rq->sched.link, pl); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); spin_lock(&ce->guc_active.lock); -- cgit v1.2.3-70-g09d2 From 88209a8ecb8b8752322908a3c3362a001bdc3a39 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:25 -0700 Subject: drm/i915/guc: Don't drop ce->guc_active.lock when unwinding context Don't drop ce->guc_active.lock when unwinding a context after reset. At one point we had to drop this because of a lock inversion but that is no longer the case. It is much safer to hold the lock so let's do that. Fixes: eb5e7da736f3 ("drm/i915/guc: Reset implementation for new GuC interface") Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Cc: Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-5-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 0c1e6b465fba..31bbfe5479ae 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -813,8 +813,6 @@ __unwind_incomplete_requests(struct intel_context *ce) continue; list_del_init(&rq->sched.link); - spin_unlock(&ce->guc_active.lock); - __i915_request_unsubmit(rq); /* Push the request back into the queue for later resubmission. */ @@ -827,8 +825,6 @@ __unwind_incomplete_requests(struct intel_context *ce) list_add(&rq->sched.link, pl); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - - spin_lock(&ce->guc_active.lock); } spin_unlock(&ce->guc_active.lock); spin_unlock_irqrestore(&sched_engine->lock, flags); -- cgit v1.2.3-70-g09d2 From d67e3d5a5da8ddcad7fcfac6a2a521128e4304af Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:26 -0700 Subject: drm/i915/guc: Process all G2H message at once in work queue Rather than processing 1 G2H at a time and re-queuing the work queue if more messages exist, process all the G2H in a single pass of the work queue. Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Cc: Daniel Vetter Cc: Michal Wajdeczko Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-6-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 22b4733b55e2..20c710a74498 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -1042,9 +1042,9 @@ static void ct_incoming_request_worker_func(struct work_struct *w) container_of(w, struct intel_guc_ct, requests.worker); bool done; - done = ct_process_incoming_requests(ct); - if (!done) - queue_work(system_unbound_wq, &ct->requests.worker); + do { + done = ct_process_incoming_requests(ct); + } while (!done); } static int ct_handle_event(struct intel_guc_ct *ct, struct ct_incoming_msg *request) -- cgit v1.2.3-70-g09d2 From 1ca36cff0166b0483fe3b99e711e9c800ebbfaa4 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:27 -0700 Subject: drm/i915/guc: Workaround reset G2H is received after schedule done G2H If the context is reset as a result of the request cancellation the context reset G2H is received after schedule disable done G2H which is the wrong order. The schedule disable done G2H release the waiting request cancellation code which resubmits the context. This races with the context reset G2H which also wants to resubmit the context but in this case it really should be a NOP as request cancellation code owns the resubmit. Use some clever tricks of checking the context state to seal this race until the GuC firmware is fixed. v2: (Checkpatch) - Fix typos v3: (Daniele) - State that is a bug in the GuC firmware Fixes: 62eaf0ae217d ("drm/i915/guc: Support request cancellation") Signed-off-by: Matthew Brost Cc: Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-7-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 41 +++++++++++++++++++---- 1 file changed, 35 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 31bbfe5479ae..f9e3725b94c1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -833,17 +833,33 @@ __unwind_incomplete_requests(struct intel_context *ce) static void __guc_reset_context(struct intel_context *ce, bool stalled) { struct i915_request *rq; + unsigned long flags; u32 head; + bool skip = false; intel_context_get(ce); /* - * GuC will implicitly mark the context as non-schedulable - * when it sends the reset notification. Make sure our state - * reflects this change. The context will be marked enabled - * on resubmission. + * GuC will implicitly mark the context as non-schedulable when it sends + * the reset notification. Make sure our state reflects this change. The + * context will be marked enabled on resubmission. + * + * XXX: If the context is reset as a result of the request cancellation + * this G2H is received after the schedule disable complete G2H which is + * wrong as this creates a race between the request cancellation code + * re-submitting the context and this G2H handler. This is a bug in the + * GuC but can be worked around in the meantime but converting this to a + * NOP if a pending enable is in flight as this indicates that a request + * cancellation has occurred. */ - clr_context_enabled(ce); + spin_lock_irqsave(&ce->guc_state.lock, flags); + if (likely(!context_pending_enable(ce))) + clr_context_enabled(ce); + else + skip = true; + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (unlikely(skip)) + goto out_put; rq = intel_context_find_active_request(ce); if (!rq) { @@ -862,6 +878,7 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled) out_replay: guc_reset_state(ce, head, stalled); __unwind_incomplete_requests(ce); +out_put: intel_context_put(ce); } @@ -1598,6 +1615,13 @@ static void guc_context_cancel_request(struct intel_context *ce, guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), true); } + + /* + * XXX: Racey if context is reset, see comment in + * __guc_reset_context(). + */ + flush_work(&ce_to_guc(ce)->ct.requests.worker); + guc_context_unblock(ce); } } @@ -2712,7 +2736,12 @@ static void guc_handle_context_reset(struct intel_guc *guc, { trace_intel_context_reset(ce); - if (likely(!intel_context_is_banned(ce))) { + /* + * XXX: Racey if request cancellation has occurred, see comment in + * __guc_reset_context(). + */ + if (likely(!intel_context_is_banned(ce) && + !context_blocked(ce))) { capture_error_state(guc, ce); guc_context_replay(ce); } -- cgit v1.2.3-70-g09d2 From ac653dd7996edf1770959e11a078312928bd7315 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:28 -0700 Subject: Revert "drm/i915/gt: Propagate change in error status to children on unhold" Propagating errors to dependent fences is broken and can lead to errors from one client ending up in another. In commit 3761baae908a ("Revert "drm/i915: Propagate errors on awaiting already signaled fences""), we attempted to get rid of fence error propagation but missed the case added in commit 8e9f84cf5cac ("drm/i915/gt: Propagate change in error status to children on unhold"). Revert that one too. This error was found by an up-and-coming selftest which triggers a reset during request cancellation and verifies that subsequent requests complete successfully. v2: (Daniel Vetter) - Use revert v3: (Jason) - Update commit message v4 (Daniele): - fix checkpatch error in commit message. References: '3761baae908a ("Revert "drm/i915: Propagate errors on awaiting already signaled fences"")' Signed-off-by: Matthew Brost Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Daniel Vetter Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-8-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index aeb324b701ec..87c595e4efe2 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2140,10 +2140,6 @@ static void __execlists_unhold(struct i915_request *rq) if (p->flags & I915_DEPENDENCY_WEAK) continue; - /* Propagate any change in error status */ - if (rq->fence.error) - i915_request_set_error_once(w, rq->fence.error); - if (w->engine != rq->engine) continue; -- cgit v1.2.3-70-g09d2 From cf37e5c820f16972bd806e06632eb83e7a152d60 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:29 -0700 Subject: drm/i915/guc: Kick tasklet after queuing a request Kick tasklet after queuing a request so it submitted in a timely manner. Fixes: 3a4cdf1982f0 ("drm/i915/guc: Implement GuC context operations for new inteface") Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-9-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index f9e3725b94c1..bd401a5be87c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1049,6 +1049,7 @@ static inline void queue_request(struct i915_sched_engine *sched_engine, list_add_tail(&rq->sched.link, i915_sched_lookup_priolist(sched_engine, prio)); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + tasklet_hi_schedule(&sched_engine->tasklet); } static int guc_bypass_tasklet_submit(struct intel_guc *guc, -- cgit v1.2.3-70-g09d2 From 9888beaaf118b6878347e1fe2b369fc66d756d18 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:30 -0700 Subject: drm/i915/guc: Don't enable scheduling on a banned context, guc_id invalid, not registered When unblocking a context, do not enable scheduling if the context is banned, guc_id invalid, or not registered. v2: (Daniele) - Add helper for unblock Fixes: 62eaf0ae217d ("drm/i915/guc: Support request cancellation") Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Cc: Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-10-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index bd401a5be87c..f5bee833ee7d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -148,6 +148,7 @@ static inline void clr_context_registered(struct intel_context *ce) #define SCHED_STATE_BLOCKED_SHIFT 4 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) + static inline void init_sched_state(struct intel_context *ce) { /* Only should be called from guc_lrc_desc_pin() */ @@ -1563,6 +1564,23 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce) return &ce->guc_blocked; } +#define SCHED_STATE_MULTI_BLOCKED_MASK \ + (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) +#define SCHED_STATE_NO_UNBLOCK \ + (SCHED_STATE_MULTI_BLOCKED_MASK | \ + SCHED_STATE_PENDING_DISABLE | \ + SCHED_STATE_BANNED) + +static bool context_cant_unblock(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + + return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || + context_guc_id_invalid(ce) || + !lrc_desc_registered(ce_to_guc(ce), ce->guc_id) || + !intel_context_is_pinned(ce); +} + static void guc_context_unblock(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); @@ -1577,9 +1595,7 @@ static void guc_context_unblock(struct intel_context *ce) spin_lock_irqsave(&ce->guc_state.lock, flags); if (unlikely(submission_disabled(guc) || - !intel_context_is_pinned(ce) || - context_pending_disable(ce) || - context_blocked(ce) > 1)) { + context_cant_unblock(ce))) { enable = false; } else { enable = true; -- cgit v1.2.3-70-g09d2 From d135865cb8e396c0cc1d7d52dbb980fde39da641 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:31 -0700 Subject: drm/i915/guc: Copy whole golden context, set engine state size of subset When the GuC does a media reset, it copies a golden context state back into the corrupted context's state. The address of the golden context and the size of the engine state restore are passed in via the GuC ADS. The i915 had a bug where it passed in the whole size of the golden context, not the size of the engine state to restore resulting in a memory corruption. Also copy the entire golden context on init rather than just the engine state that is restored. v2 (Daniele): use defines to avoid duplicated const variables (John). Fixes: 481d458caede ("drm/i915/guc: Add golden context to GuC ADS") Signed-off-by: Matthew Brost Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-11-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 6926919bcac6..2c6ea64af7ec 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -349,6 +349,8 @@ static void fill_engine_enable_masks(struct intel_gt *gt, info->engine_enabled_masks[GUC_VIDEOENHANCE_CLASS] = VEBOX_MASK(gt); } +#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) +#define LRC_SKIP_SIZE (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE) static int guc_prep_golden_context(struct intel_guc *guc, struct __guc_ads_blob *blob) { @@ -396,7 +398,18 @@ static int guc_prep_golden_context(struct intel_guc *guc, if (!blob) continue; - blob->ads.eng_state_size[guc_class] = real_size; + /* + * This interface is slightly confusing. We need to pass the + * base address of the full golden context and the size of just + * the engine state, which is the section of the context image + * that starts after the execlists context. This is required to + * allow the GuC to restore just the engine state when a + * watchdog reset occurs. + * We calculate the engine state size by removing the size of + * what comes before it in the context image (which is identical + * on all engines). + */ + blob->ads.eng_state_size[guc_class] = real_size - LRC_SKIP_SIZE; blob->ads.golden_context_lrca[guc_class] = addr_ggtt; addr_ggtt += alloc_size; } @@ -436,11 +449,6 @@ static void guc_init_golden_context(struct intel_guc *guc) u8 engine_class, guc_class; u8 *ptr; - /* Skip execlist and PPGTT registers + HWSP */ - const u32 lr_hw_context_size = 80 * sizeof(u32); - const u32 skip_size = LRC_PPHWSP_SZ * PAGE_SIZE + - lr_hw_context_size; - if (!intel_uc_uses_guc_submission(>->uc)) return; @@ -476,12 +484,12 @@ static void guc_init_golden_context(struct intel_guc *guc) continue; } - GEM_BUG_ON(blob->ads.eng_state_size[guc_class] != real_size); + GEM_BUG_ON(blob->ads.eng_state_size[guc_class] != + real_size - LRC_SKIP_SIZE); GEM_BUG_ON(blob->ads.golden_context_lrca[guc_class] != addr_ggtt); addr_ggtt += alloc_size; - shmem_read(engine->default_state, skip_size, ptr + skip_size, - real_size - skip_size); + shmem_read(engine->default_state, 0, ptr, real_size); ptr += alloc_size; } -- cgit v1.2.3-70-g09d2 From d2420c2ed8f1bae5f36f681aad73b3d4c5a57d39 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:32 -0700 Subject: drm/i915/selftests: Add initial GuC selftest for scrubbing lost G2H While debugging an issue with full GT resets I went down a rabbit hole thinking the scrubbing of lost G2H wasn't working correctly. This proved to be incorrect as this was working just fine but this chase inspired me to write a selftest to prove that this works. This simple selftest injects errors dropping various G2H and then issues a full GT reset proving that the scrubbing of these G2H doesn't blow up. v2: (Daniel Vetter) - Use ifdef instead of macros for selftests v3: (Checkpatch) - A space after 'switch' statement v4: (Daniele) - A comment saying GT won't idle if G2H are lost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-12-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context_types.h | 18 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 25 ++++ drivers/gpu/drm/i915/gt/uc/selftest_guc.c | 127 +++++++++++++++++++++ .../gpu/drm/i915/selftests/i915_live_selftests.h | 1 + .../drm/i915/selftests/intel_scheduler_helpers.c | 12 ++ .../drm/i915/selftests/intel_scheduler_helpers.h | 2 + 6 files changed, 185 insertions(+) create mode 100644 drivers/gpu/drm/i915/gt/uc/selftest_guc.c (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index e54351a170e2..3a73f3117873 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -198,6 +198,24 @@ struct intel_context { */ u8 guc_prio; u32 guc_prio_count[GUC_CLIENT_PRIORITY_NUM]; + +#ifdef CONFIG_DRM_I915_SELFTEST + /** + * @drop_schedule_enable: Force drop of schedule enable G2H for selftest + */ + bool drop_schedule_enable; + + /** + * @drop_schedule_disable: Force drop of schedule disable G2H for + * selftest + */ + bool drop_schedule_disable; + + /** + * @drop_deregister: Force drop of deregister G2H for selftest + */ + bool drop_deregister; +#endif }; #endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index f5bee833ee7d..5970c54a2e72 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2639,6 +2639,13 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, trace_intel_context_deregister_done(ce); +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_deregister)) { + ce->drop_deregister = false; + return 0; + } +#endif + if (context_wait_for_deregister_to_register(ce)) { struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; @@ -2693,10 +2700,24 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, trace_intel_context_sched_done(ce); if (context_pending_enable(ce)) { +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_schedule_enable)) { + ce->drop_schedule_enable = false; + return 0; + } +#endif + clr_context_pending_enable(ce); } else if (context_pending_disable(ce)) { bool banned; +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_schedule_disable)) { + ce->drop_schedule_disable = false; + return 0; + } +#endif + /* * Unpin must be done before __guc_signal_context_fence, * otherwise a race exists between the requests getting @@ -3073,3 +3094,7 @@ bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) return false; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_guc.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c new file mode 100644 index 000000000000..fb0e4a7bd8ca --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright �� 2021 Intel Corporation + */ + +#include "selftests/intel_scheduler_helpers.h" + +static struct i915_request *nop_user_request(struct intel_context *ce, + struct i915_request *from) +{ + struct i915_request *rq; + int ret; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + if (from) { + ret = i915_sw_fence_await_dma_fence(&rq->submit, + &from->fence, 0, + I915_FENCE_GFP); + if (ret < 0) { + i915_request_put(rq); + return ERR_PTR(ret); + } + } + + i915_request_get(rq); + i915_request_add(rq); + + return rq; +} + +static int intel_guc_scrub_ctbs(void *arg) +{ + struct intel_gt *gt = arg; + int ret = 0; + int i; + struct i915_request *last[3] = {NULL, NULL, NULL}, *rq; + intel_wakeref_t wakeref; + struct intel_engine_cs *engine; + struct intel_context *ce; + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + engine = intel_selftest_find_any_engine(gt); + + /* Submit requests and inject errors forcing G2H to be dropped */ + for (i = 0; i < 3; ++i) { + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + pr_err("Failed to create context, %d: %d\n", i, ret); + goto err; + } + + switch (i) { + case 0: + ce->drop_schedule_enable = true; + break; + case 1: + ce->drop_schedule_disable = true; + break; + case 2: + ce->drop_deregister = true; + break; + } + + rq = nop_user_request(ce, NULL); + intel_context_put(ce); + + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + pr_err("Failed to create request, %d: %d\n", i, ret); + goto err; + } + + last[i] = rq; + } + + for (i = 0; i < 3; ++i) { + ret = i915_request_wait(last[i], 0, HZ); + if (ret < 0) { + pr_err("Last request failed to complete: %d\n", ret); + goto err; + } + i915_request_put(last[i]); + last[i] = NULL; + } + + /* Force all H2G / G2H to be submitted / processed */ + intel_gt_retire_requests(gt); + msleep(500); + + /* Scrub missing G2H */ + intel_gt_handle_error(engine->gt, -1, 0, "selftest reset"); + + /* GT will not idle if G2H are lost */ + ret = intel_gt_wait_for_idle(gt, HZ); + if (ret < 0) { + pr_err("GT failed to idle: %d\n", ret); + goto err; + } + +err: + for (i = 0; i < 3; ++i) + if (last[i]) + i915_request_put(last[i]); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + + return ret; +} + +int intel_guc_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(intel_guc_scrub_ctbs), + }; + struct intel_gt *gt = &i915->gt; + + if (intel_gt_is_wedged(gt)) + return 0; + + if (!intel_uc_uses_guc_submission(>->uc)) + return 0; + + return intel_gt_live_subtests(tests, gt); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index cfa5c4165a4f..3cf6758931f9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -47,5 +47,6 @@ selftest(execlists, intel_execlists_live_selftests) selftest(ring_submission, intel_ring_submission_live_selftests) selftest(perf, i915_perf_live_selftests) selftest(slpc, intel_slpc_live_selftests) +selftest(guc, intel_guc_live_selftests) /* Here be dragons: keep last to run last! */ selftest(late_gt_pm, intel_gt_pm_late_selftests) diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c index 4b328346b48a..310fb83c527e 100644 --- a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c @@ -14,6 +14,18 @@ #define REDUCED_PREEMPT 10 #define WAIT_FOR_RESET_TIME 10000 +struct intel_engine_cs *intel_selftest_find_any_engine(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt, id) + return engine; + + pr_err("No valid engine found!\n"); + return NULL; +} + int intel_selftest_modify_policy(struct intel_engine_cs *engine, struct intel_selftest_saved_policy *saved, u32 modify_type) diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h index 35c098601ac0..ae60bb507f45 100644 --- a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h @@ -10,6 +10,7 @@ struct i915_request; struct intel_engine_cs; +struct intel_gt; struct intel_selftest_saved_policy { u32 flags; @@ -23,6 +24,7 @@ enum selftest_scheduler_modify { SELFTEST_SCHEDULER_MODIFY_FAST_RESET, }; +struct intel_engine_cs *intel_selftest_find_any_engine(struct intel_gt *gt); int intel_selftest_modify_policy(struct intel_engine_cs *engine, struct intel_selftest_saved_policy *saved, enum selftest_scheduler_modify modify_type); -- cgit v1.2.3-70-g09d2 From 422cda4f50091bdfa114c7d19fce31919c920fe1 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:33 -0700 Subject: drm/i915/guc: Take context ref when cancelling request A context can get destroyed after cancelling a request, if a context or GT reset occurs, so take a reference to context when cancelling a request. Fixes: 62eaf0ae217d ("drm/i915/guc: Support request cancellation") Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-13-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 5970c54a2e72..e036a171ff17 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1624,8 +1624,10 @@ static void guc_context_cancel_request(struct intel_context *ce, struct i915_request *rq) { if (i915_sw_fence_signaled(&rq->submit)) { - struct i915_sw_fence *fence = guc_context_block(ce); + struct i915_sw_fence *fence; + intel_context_get(ce); + fence = guc_context_block(ce); i915_sw_fence_wait(fence); if (!i915_request_completed(rq)) { __i915_request_skip(rq); @@ -1640,6 +1642,7 @@ static void guc_context_cancel_request(struct intel_context *ce, flush_work(&ce_to_guc(ce)->ct.requests.worker); guc_context_unblock(ce); + intel_context_put(ce); } } -- cgit v1.2.3-70-g09d2 From f16d5cb981a557c1a32bc43ef28b5dc254f7239c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:34 -0700 Subject: drm/i915/guc: Don't touch guc_state.sched_state without a lock Before we did some clever tricks to not use the a lock when touching guc_state.sched_state in certain cases. Don't do that, enforce the use of the lock. v2: (kernel test robo ) - Add __maybe_unused to sched_state_is_init() v3: rebase after the unused code path removal has been moved to an earlier patch. Signed-off-by: Matthew Brost Reported-by: kernel test robot Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-14-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index e036a171ff17..ca73128d7b4d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -151,11 +151,23 @@ static inline void clr_context_registered(struct intel_context *ce) static inline void init_sched_state(struct intel_context *ce) { - /* Only should be called from guc_lrc_desc_pin() */ + lockdep_assert_held(&ce->guc_state.lock); atomic_set(&ce->guc_sched_state_no_lock, 0); ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; } +__maybe_unused +static bool sched_state_is_init(struct intel_context *ce) +{ + /* + * XXX: Kernel contexts can have SCHED_STATE_NO_LOCK_REGISTERED after + * suspend. + */ + return !(atomic_read(&ce->guc_sched_state_no_lock) & + ~SCHED_STATE_NO_LOCK_REGISTERED) && + !(ce->guc_state.sched_state &= ~SCHED_STATE_BLOCKED_MASK); +} + static inline bool context_wait_for_deregister_to_register(struct intel_context *ce) { @@ -166,7 +178,7 @@ context_wait_for_deregister_to_register(struct intel_context *ce) static inline void set_context_wait_for_deregister_to_register(struct intel_context *ce) { - /* Only should be called from guc_lrc_desc_pin() without lock */ + lockdep_assert_held(&ce->guc_state.lock); ce->guc_state.sched_state |= SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; } @@ -607,9 +619,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) bool pending_disable, pending_enable, deregister, destroyed, banned; xa_for_each(&guc->context_lookup, index, ce) { - /* Flush context */ spin_lock_irqsave(&ce->guc_state.lock, flags); - spin_unlock_irqrestore(&ce->guc_state.lock, flags); /* * Once we are at this point submission_disabled() is guaranteed @@ -625,6 +635,8 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) banned = context_banned(ce); init_sched_state(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (pending_enable || destroyed || deregister) { decr_outstanding_submission_g2h(guc); if (deregister) @@ -1324,6 +1336,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) int ret = 0; GEM_BUG_ON(!engine->mask); + GEM_BUG_ON(!sched_state_is_init(ce)); /* * Ensure LRC + CT vmas are is same region as write barrier is done @@ -1352,7 +1365,6 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) desc->priority = ce->guc_prio; desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; guc_context_policy_init(engine, desc); - init_sched_state(ce); /* * The context_lookup xarray is used to determine if the hardware -- cgit v1.2.3-70-g09d2 From ae36b62927f1cfe81095641d6279cbf23fb64b2a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:35 -0700 Subject: drm/i915/guc: Reset LRC descriptor if register returns -ENODEV Reset LRC descriptor if a context register returns -ENODEV as this means we are mid-reset. Fixes: eb5e7da736f3 ("drm/i915/guc: Reset implementation for new GuC interface") Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-15-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index ca73128d7b4d..dcd7a09f8559 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1405,10 +1405,12 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) } else { with_intel_runtime_pm(runtime_pm, wakeref) ret = register_context(ce, loop); - if (unlikely(ret == -EBUSY)) + if (unlikely(ret == -EBUSY)) { + reset_lrc_desc(guc, desc_idx); + } else if (unlikely(ret == -ENODEV)) { reset_lrc_desc(guc, desc_idx); - else if (unlikely(ret == -ENODEV)) ret = 0; /* Will get registered later */ + } } return ret; -- cgit v1.2.3-70-g09d2 From b0d83888a32b30cb95bee7385151ac58d51a2340 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:36 -0700 Subject: drm/i915/guc: Release submit fence from an irq_work A subsequent patch will flip the locking hierarchy from ce->guc_state.lock -> sched_engine->lock to sched_engine->lock -> ce->guc_state.lock. As such we need to release the submit fence for a request from an IRQ to break a lock inversion - i.e. the fence must be release went holding ce->guc_state.lock and the releasing of the can acquire sched_engine->lock. v2: (Daniele) - Delete request from list before calling irq_work_queue Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-16-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 22 +++++++++++++++++++--- drivers/gpu/drm/i915/i915_request.h | 5 +++++ 2 files changed, 24 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index dcd7a09f8559..4b7ccf9730a3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2049,17 +2049,32 @@ static const struct intel_context_ops guc_context_ops = { .create_virtual = guc_create_virtual, }; +static void submit_work_cb(struct irq_work *wrk) +{ + struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); + + might_lock(&rq->engine->sched_engine->lock); + i915_sw_fence_complete(&rq->submit); +} + static void __guc_signal_context_fence(struct intel_context *ce) { - struct i915_request *rq; + struct i915_request *rq, *rn; lockdep_assert_held(&ce->guc_state.lock); if (!list_empty(&ce->guc_state.fences)) trace_intel_context_fence_release(ce); - list_for_each_entry(rq, &ce->guc_state.fences, guc_fence_link) - i915_sw_fence_complete(&rq->submit); + /* + * Use an IRQ to ensure locking order of sched_engine->lock -> + * ce->guc_state.lock is preserved. + */ + list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, + guc_fence_link) { + list_del(&rq->guc_fence_link); + irq_work_queue(&rq->submit_work); + } INIT_LIST_HEAD(&ce->guc_state.fences); } @@ -2169,6 +2184,7 @@ out: spin_lock_irqsave(&ce->guc_state.lock, flags); if (context_wait_for_deregister_to_register(ce) || context_pending_disable(ce)) { + init_irq_work(&rq->submit_work, submit_work_cb); i915_sw_fence_await(&rq->submit); list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 1bc1349ba3c2..d818cfbfc41d 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -218,6 +218,11 @@ struct i915_request { }; struct llist_head execute_cb; struct i915_sw_fence semaphore; + /** + * @submit_work: complete submit fence from an IRQ if needed for + * locking hierarchy reasons. + */ + struct irq_work submit_work; /* * A list of everyone we wait upon, and everyone who waits upon us. -- cgit v1.2.3-70-g09d2 From 52d66c06fd9412e9738330b0502b4b89bf079405 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:37 -0700 Subject: drm/i915/guc: Move guc_blocked fence to struct guc_state Move guc_blocked fence to struct guc_state as the lock which protects the fence lives there. s/ce->guc_blocked/ce->guc_state.blocked/g v2: (Daniele) - s/blocked_fence/blocked/g Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-17-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context.c | 5 +++-- drivers/gpu/drm/i915/gt/intel_context_types.h | 5 ++--- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 18 +++++++++--------- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 745e84c72c90..3048267ddc7e 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -405,8 +405,9 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) * Initialize fence to be complete as this is expected to be complete * unless there is a pending schedule disable outstanding. */ - i915_sw_fence_init(&ce->guc_blocked, sw_fence_dummy_notify); - i915_sw_fence_commit(&ce->guc_blocked); + i915_sw_fence_init(&ce->guc_state.blocked, + sw_fence_dummy_notify); + i915_sw_fence_commit(&ce->guc_state.blocked); i915_active_init(&ce->active, __intel_context_active, __intel_context_retire, 0); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 3a73f3117873..5aecb9038b5b 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -167,6 +167,8 @@ struct intel_context { * fence related to GuC submission */ struct list_head fences; + /* GuC context blocked fence */ + struct i915_sw_fence blocked; } guc_state; struct { @@ -190,9 +192,6 @@ struct intel_context { */ struct list_head guc_id_link; - /* GuC context blocked fence */ - struct i915_sw_fence guc_blocked; - /* * GuC priority management */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 4b7ccf9730a3..99f223114331 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1502,24 +1502,24 @@ static void guc_blocked_fence_complete(struct intel_context *ce) { lockdep_assert_held(&ce->guc_state.lock); - if (!i915_sw_fence_done(&ce->guc_blocked)) - i915_sw_fence_complete(&ce->guc_blocked); + if (!i915_sw_fence_done(&ce->guc_state.blocked)) + i915_sw_fence_complete(&ce->guc_state.blocked); } static void guc_blocked_fence_reinit(struct intel_context *ce) { lockdep_assert_held(&ce->guc_state.lock); - GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_blocked)); + GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); /* * This fence is always complete unless a pending schedule disable is * outstanding. We arm the fence here and complete it when we receive * the pending schedule disable complete message. */ - i915_sw_fence_fini(&ce->guc_blocked); - i915_sw_fence_reinit(&ce->guc_blocked); - i915_sw_fence_await(&ce->guc_blocked); - i915_sw_fence_commit(&ce->guc_blocked); + i915_sw_fence_fini(&ce->guc_state.blocked); + i915_sw_fence_reinit(&ce->guc_state.blocked); + i915_sw_fence_await(&ce->guc_state.blocked); + i915_sw_fence_commit(&ce->guc_state.blocked); } static u16 prep_context_pending_disable(struct intel_context *ce) @@ -1559,7 +1559,7 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce) if (enabled) clr_context_enabled(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); - return &ce->guc_blocked; + return &ce->guc_state.blocked; } /* @@ -1575,7 +1575,7 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce) with_intel_runtime_pm(runtime_pm, wakeref) __guc_context_sched_disable(guc, ce, guc_id); - return &ce->guc_blocked; + return &ce->guc_state.blocked; } #define SCHED_STATE_MULTI_BLOCKED_MASK \ -- cgit v1.2.3-70-g09d2 From 0f7976506de615abfcc54e2469417c69ff2b030f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:38 -0700 Subject: drm/i915/guc: Rework and simplify locking Rework and simplify the locking with GuC subission. Drop sched_state_no_lock and move all fields under the guc_state.sched_state and protect all these fields with guc_state.lock . This requires changing the locking hierarchy from guc_state.lock -> sched_engine.lock to sched_engine.lock -> guc_state.lock. v2: (Daniele) - Don't check fields outside of lock during sched disable, check less fields within lock as some of the outside are no longer needed Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-18-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context_types.h | 5 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 199 ++++++++++------------ drivers/gpu/drm/i915/i915_trace.h | 6 +- 3 files changed, 90 insertions(+), 120 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 5aecb9038b5b..d2f798ef678c 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -161,7 +161,7 @@ struct intel_context { * sched_state: scheduling state of this context using GuC * submission */ - u16 sched_state; + u32 sched_state; /* * fences: maintains of list of requests that have a submit * fence related to GuC submission @@ -178,9 +178,6 @@ struct intel_context { struct list_head requests; } guc_active; - /* GuC scheduling state flags that do not require a lock. */ - atomic_t guc_sched_state_no_lock; - /* GuC LRC descriptor ID */ u16 guc_id; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 99f223114331..383dc1017004 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -72,87 +72,24 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count); #define GUC_REQUEST_SIZE 64 /* bytes */ -/* - * Below is a set of functions which control the GuC scheduling state which do - * not require a lock as all state transitions are mutually exclusive. i.e. It - * is not possible for the context pinning code and submission, for the same - * context, to be executing simultaneously. We still need an atomic as it is - * possible for some of the bits to changing at the same time though. - */ -#define SCHED_STATE_NO_LOCK_ENABLED BIT(0) -#define SCHED_STATE_NO_LOCK_PENDING_ENABLE BIT(1) -#define SCHED_STATE_NO_LOCK_REGISTERED BIT(2) -static inline bool context_enabled(struct intel_context *ce) -{ - return (atomic_read(&ce->guc_sched_state_no_lock) & - SCHED_STATE_NO_LOCK_ENABLED); -} - -static inline void set_context_enabled(struct intel_context *ce) -{ - atomic_or(SCHED_STATE_NO_LOCK_ENABLED, &ce->guc_sched_state_no_lock); -} - -static inline void clr_context_enabled(struct intel_context *ce) -{ - atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED, - &ce->guc_sched_state_no_lock); -} - -static inline bool context_pending_enable(struct intel_context *ce) -{ - return (atomic_read(&ce->guc_sched_state_no_lock) & - SCHED_STATE_NO_LOCK_PENDING_ENABLE); -} - -static inline void set_context_pending_enable(struct intel_context *ce) -{ - atomic_or(SCHED_STATE_NO_LOCK_PENDING_ENABLE, - &ce->guc_sched_state_no_lock); -} - -static inline void clr_context_pending_enable(struct intel_context *ce) -{ - atomic_and((u32)~SCHED_STATE_NO_LOCK_PENDING_ENABLE, - &ce->guc_sched_state_no_lock); -} - -static inline bool context_registered(struct intel_context *ce) -{ - return (atomic_read(&ce->guc_sched_state_no_lock) & - SCHED_STATE_NO_LOCK_REGISTERED); -} - -static inline void set_context_registered(struct intel_context *ce) -{ - atomic_or(SCHED_STATE_NO_LOCK_REGISTERED, - &ce->guc_sched_state_no_lock); -} - -static inline void clr_context_registered(struct intel_context *ce) -{ - atomic_and((u32)~SCHED_STATE_NO_LOCK_REGISTERED, - &ce->guc_sched_state_no_lock); -} - /* * Below is a set of functions which control the GuC scheduling state which - * require a lock, aside from the special case where the functions are called - * from guc_lrc_desc_pin(). In that case it isn't possible for any other code - * path to be executing on the context. + * require a lock. */ #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) #define SCHED_STATE_DESTROYED BIT(1) #define SCHED_STATE_PENDING_DISABLE BIT(2) #define SCHED_STATE_BANNED BIT(3) -#define SCHED_STATE_BLOCKED_SHIFT 4 +#define SCHED_STATE_ENABLED BIT(4) +#define SCHED_STATE_PENDING_ENABLE BIT(5) +#define SCHED_STATE_REGISTERED BIT(6) +#define SCHED_STATE_BLOCKED_SHIFT 7 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) static inline void init_sched_state(struct intel_context *ce) { lockdep_assert_held(&ce->guc_state.lock); - atomic_set(&ce->guc_sched_state_no_lock, 0); ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; } @@ -163,9 +100,8 @@ static bool sched_state_is_init(struct intel_context *ce) * XXX: Kernel contexts can have SCHED_STATE_NO_LOCK_REGISTERED after * suspend. */ - return !(atomic_read(&ce->guc_sched_state_no_lock) & - ~SCHED_STATE_NO_LOCK_REGISTERED) && - !(ce->guc_state.sched_state &= ~SCHED_STATE_BLOCKED_MASK); + return !(ce->guc_state.sched_state &= + ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED)); } static inline bool @@ -238,6 +174,57 @@ static inline void clr_context_banned(struct intel_context *ce) ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; } +static inline bool context_enabled(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_ENABLED; +} + +static inline void set_context_enabled(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_ENABLED; +} + +static inline void clr_context_enabled(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; +} + +static inline bool context_pending_enable(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; +} + +static inline void set_context_pending_enable(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; +} + +static inline void clr_context_pending_enable(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; +} + +static inline bool context_registered(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; +} + +static inline void set_context_registered(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; +} + +static inline void clr_context_registered(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; +} + static inline u32 context_blocked(struct intel_context *ce) { return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> @@ -246,7 +233,6 @@ static inline u32 context_blocked(struct intel_context *ce) static inline void incr_context_blocked(struct intel_context *ce) { - lockdep_assert_held(&ce->engine->sched_engine->lock); lockdep_assert_held(&ce->guc_state.lock); ce->guc_state.sched_state += SCHED_STATE_BLOCKED; @@ -256,7 +242,6 @@ static inline void incr_context_blocked(struct intel_context *ce) static inline void decr_context_blocked(struct intel_context *ce) { - lockdep_assert_held(&ce->engine->sched_engine->lock); lockdep_assert_held(&ce->guc_state.lock); GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ @@ -452,6 +437,8 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) u32 g2h_len_dw = 0; bool enabled; + lockdep_assert_held(&rq->engine->sched_engine->lock); + /* * Corner case where requests were sitting in the priority list or a * request resubmitted after the context was banned. @@ -459,7 +446,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) if (unlikely(intel_context_is_banned(ce))) { i915_request_put(i915_request_mark_eio(rq)); intel_engine_signal_breadcrumbs(ce->engine); - goto out; + return 0; } GEM_BUG_ON(!atomic_read(&ce->guc_id_ref)); @@ -472,9 +459,11 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) if (unlikely(!lrc_desc_registered(guc, ce->guc_id))) { err = guc_lrc_desc_pin(ce, false); if (unlikely(err)) - goto out; + return err; } + spin_lock(&ce->guc_state.lock); + /* * The request / context will be run on the hardware when scheduling * gets enabled in the unblock. @@ -509,6 +498,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) trace_i915_request_guc_submit(rq); out: + spin_unlock(&ce->guc_state.lock); return err; } @@ -747,6 +737,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) wait_for_reset(guc, &guc->outstanding_submission_g2h); } while (!list_empty(&guc->ct.requests.incoming)); } + scrub_guc_desc_for_outstanding_g2h(guc); } @@ -1147,7 +1138,11 @@ static int steal_guc_id(struct intel_guc *guc) list_del_init(&ce->guc_id_link); guc_id = ce->guc_id; + + spin_lock(&ce->guc_state.lock); clr_context_registered(ce); + spin_unlock(&ce->guc_state.lock); + set_context_guc_id_invalid(ce); return guc_id; } else { @@ -1183,6 +1178,8 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) try_again: spin_lock_irqsave(&guc->contexts_lock, flags); + might_lock(&ce->guc_state.lock); + if (context_guc_id_invalid(ce)) { ret = assign_guc_id(guc, &ce->guc_id); if (ret) @@ -1262,8 +1259,13 @@ static int register_context(struct intel_context *ce, bool loop) trace_intel_context_register(ce); ret = __guc_action_register_context(guc, ce->guc_id, offset, loop); - if (likely(!ret)) + if (likely(!ret)) { + unsigned long flags; + + spin_lock_irqsave(&ce->guc_state.lock, flags); set_context_registered(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + } return ret; } @@ -1537,7 +1539,6 @@ static u16 prep_context_pending_disable(struct intel_context *ce) static struct i915_sw_fence *guc_context_block(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); - struct i915_sched_engine *sched_engine = ce->engine->sched_engine; unsigned long flags; struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; intel_wakeref_t wakeref; @@ -1546,13 +1547,7 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce) spin_lock_irqsave(&ce->guc_state.lock, flags); - /* - * Sync with submission path, increment before below changes to context - * state. - */ - spin_lock(&sched_engine->lock); incr_context_blocked(ce); - spin_unlock(&sched_engine->lock); enabled = context_enabled(ce); if (unlikely(!enabled || submission_disabled(guc))) { @@ -1598,7 +1593,6 @@ static bool context_cant_unblock(struct intel_context *ce) static void guc_context_unblock(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); - struct i915_sched_engine *sched_engine = ce->engine->sched_engine; unsigned long flags; struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; intel_wakeref_t wakeref; @@ -1618,13 +1612,7 @@ static void guc_context_unblock(struct intel_context *ce) intel_context_get(ce); } - /* - * Sync with submission path, decrement after above changes to context - * state. - */ - spin_lock(&sched_engine->lock); decr_context_blocked(ce); - spin_unlock(&sched_engine->lock); spin_unlock_irqrestore(&ce->guc_state.lock, flags); @@ -1730,16 +1718,6 @@ static void guc_context_sched_disable(struct intel_context *ce) struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; intel_wakeref_t wakeref; u16 guc_id; - bool enabled; - - if (submission_disabled(guc) || context_guc_id_invalid(ce) || - !lrc_desc_registered(guc, ce->guc_id)) { - clr_context_enabled(ce); - goto unpin; - } - - if (!context_enabled(ce)) - goto unpin; spin_lock_irqsave(&ce->guc_state.lock, flags); @@ -1753,10 +1731,8 @@ static void guc_context_sched_disable(struct intel_context *ce) * sleep) ensures another process doesn't pin this context and generate * a request before we set the 'context_pending_disable' flag here. */ - enabled = context_enabled(ce); - if (unlikely(!enabled || submission_disabled(guc))) { - if (enabled) - clr_context_enabled(ce); + if (unlikely(!context_enabled(ce) || submission_disabled(guc))) { + clr_context_enabled(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); goto unpin; } @@ -1784,7 +1760,6 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) GEM_BUG_ON(ce != __get_context(guc, ce->guc_id)); GEM_BUG_ON(context_enabled(ce)); - clr_context_registered(ce); deregister_context(ce, ce->guc_id); } @@ -1857,8 +1832,10 @@ static void guc_context_destroy(struct kref *kref) /* Seal race with Reset */ spin_lock_irqsave(&ce->guc_state.lock, flags); disabled = submission_disabled(guc); - if (likely(!disabled)) + if (likely(!disabled)) { set_context_destroyed(ce); + clr_context_registered(ce); + } spin_unlock_irqrestore(&ce->guc_state.lock, flags); if (unlikely(disabled)) { release_guc_id(guc, ce); @@ -2724,8 +2701,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, (!context_pending_enable(ce) && !context_pending_disable(ce)))) { drm_err(&guc_to_gt(guc)->i915->drm, - "Bad context sched_state 0x%x, 0x%x, desc_idx %u", - atomic_read(&ce->guc_sched_state_no_lock), + "Bad context sched_state 0x%x, desc_idx %u", ce->guc_state.sched_state, desc_idx); return -EPROTO; } @@ -2740,7 +2716,9 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, } #endif + spin_lock_irqsave(&ce->guc_state.lock, flags); clr_context_pending_enable(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); } else if (context_pending_disable(ce)) { bool banned; @@ -3014,9 +2992,8 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, atomic_read(&ce->pin_count)); drm_printf(p, "\t\tGuC ID Ref Count: %u\n", atomic_read(&ce->guc_id_ref)); - drm_printf(p, "\t\tSchedule State: 0x%x, 0x%x\n\n", - ce->guc_state.sched_state, - atomic_read(&ce->guc_sched_state_no_lock)); + drm_printf(p, "\t\tSchedule State: 0x%x\n\n", + ce->guc_state.sched_state); guc_log_context_priority(p, ce); } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 237e5061381b..510168c1d703 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -903,7 +903,6 @@ DECLARE_EVENT_CLASS(intel_context, __field(u32, guc_id) __field(int, pin_count) __field(u32, sched_state) - __field(u32, guc_sched_state_no_lock) __field(u8, guc_prio) ), @@ -911,15 +910,12 @@ DECLARE_EVENT_CLASS(intel_context, __entry->guc_id = ce->guc_id; __entry->pin_count = atomic_read(&ce->pin_count); __entry->sched_state = ce->guc_state.sched_state; - __entry->guc_sched_state_no_lock = - atomic_read(&ce->guc_sched_state_no_lock); __entry->guc_prio = ce->guc_prio; ), - TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x,0x%x, guc_prio=%u", + TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x, guc_prio=%u", __entry->guc_id, __entry->pin_count, __entry->sched_state, - __entry->guc_sched_state_no_lock, __entry->guc_prio) ); -- cgit v1.2.3-70-g09d2 From 1424ba81a2d056008adebab21bf633c420235e3c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:39 -0700 Subject: drm/i915/guc: Proper xarray usage for contexts_lookup Lock the xarray and take ref to the context if needed. v2: (Checkpatch) - Add new line after declaration (Daniel Vetter) - Correct put / get accounting in xa_for_loops v3: (Checkpatch) - Extra new line Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-19-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 102 ++++++++++++++++++---- 1 file changed, 87 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 383dc1017004..4814acf1c2ac 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -608,8 +608,18 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) unsigned long index, flags; bool pending_disable, pending_enable, deregister, destroyed, banned; + xa_lock_irqsave(&guc->context_lookup, flags); xa_for_each(&guc->context_lookup, index, ce) { - spin_lock_irqsave(&ce->guc_state.lock, flags); + /* + * Corner case where the ref count on the object is zero but and + * deregister G2H was lost. In this case we don't touch the ref + * count and finish the destroy of the context. + */ + bool do_put = kref_get_unless_zero(&ce->ref); + + xa_unlock(&guc->context_lookup); + + spin_lock(&ce->guc_state.lock); /* * Once we are at this point submission_disabled() is guaranteed @@ -625,7 +635,9 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) banned = context_banned(ce); init_sched_state(ce); - spin_unlock_irqrestore(&ce->guc_state.lock, flags); + spin_unlock(&ce->guc_state.lock); + + GEM_BUG_ON(!do_put && !destroyed); if (pending_enable || destroyed || deregister) { decr_outstanding_submission_g2h(guc); @@ -648,13 +660,19 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) } intel_context_sched_disable_unpin(ce); decr_outstanding_submission_g2h(guc); - spin_lock_irqsave(&ce->guc_state.lock, flags); + + spin_lock(&ce->guc_state.lock); guc_blocked_fence_complete(ce); - spin_unlock_irqrestore(&ce->guc_state.lock, flags); + spin_unlock(&ce->guc_state.lock); intel_context_put(ce); } + + if (do_put) + intel_context_put(ce); + xa_lock(&guc->context_lookup); } + xa_unlock_irqrestore(&guc->context_lookup, flags); } static inline bool @@ -890,16 +908,29 @@ void intel_guc_submission_reset(struct intel_guc *guc, bool stalled) { struct intel_context *ce; unsigned long index; + unsigned long flags; if (unlikely(!guc_submission_initialized(guc))) { /* Reset called during driver load? GuC not yet initialised! */ return; } - xa_for_each(&guc->context_lookup, index, ce) + xa_lock_irqsave(&guc->context_lookup, flags); + xa_for_each(&guc->context_lookup, index, ce) { + if (!kref_get_unless_zero(&ce->ref)) + continue; + + xa_unlock(&guc->context_lookup); + if (intel_context_is_pinned(ce)) __guc_reset_context(ce, stalled); + intel_context_put(ce); + + xa_lock(&guc->context_lookup); + } + xa_unlock_irqrestore(&guc->context_lookup, flags); + /* GuC is blown away, drop all references to contexts */ xa_destroy(&guc->context_lookup); } @@ -974,11 +1005,24 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc) { struct intel_context *ce; unsigned long index; + unsigned long flags; + + xa_lock_irqsave(&guc->context_lookup, flags); + xa_for_each(&guc->context_lookup, index, ce) { + if (!kref_get_unless_zero(&ce->ref)) + continue; + + xa_unlock(&guc->context_lookup); - xa_for_each(&guc->context_lookup, index, ce) if (intel_context_is_pinned(ce)) guc_cancel_context_requests(ce); + intel_context_put(ce); + + xa_lock(&guc->context_lookup); + } + xa_unlock_irqrestore(&guc->context_lookup, flags); + guc_cancel_sched_engine_requests(guc->sched_engine); /* GuC is blown away, drop all references to contexts */ @@ -2866,21 +2910,28 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine) struct intel_context *ce; struct i915_request *rq; unsigned long index; + unsigned long flags; /* Reset called during driver load? GuC not yet initialised! */ if (unlikely(!guc_submission_initialized(guc))) return; + xa_lock_irqsave(&guc->context_lookup, flags); xa_for_each(&guc->context_lookup, index, ce) { - if (!intel_context_is_pinned(ce)) + if (!kref_get_unless_zero(&ce->ref)) continue; + xa_unlock(&guc->context_lookup); + + if (!intel_context_is_pinned(ce)) + goto next; + if (intel_engine_is_virtual(ce->engine)) { if (!(ce->engine->mask & engine->mask)) - continue; + goto next; } else { if (ce->engine != engine) - continue; + goto next; } list_for_each_entry(rq, &ce->guc_active.requests, sched.link) { @@ -2890,9 +2941,16 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine) intel_engine_set_hung_context(engine, ce); /* Can only cope with one hang at a time... */ - return; + intel_context_put(ce); + xa_lock(&guc->context_lookup); + goto done; } +next: + intel_context_put(ce); + xa_lock(&guc->context_lookup); } +done: + xa_unlock_irqrestore(&guc->context_lookup, flags); } void intel_guc_dump_active_requests(struct intel_engine_cs *engine, @@ -2908,23 +2966,34 @@ void intel_guc_dump_active_requests(struct intel_engine_cs *engine, if (unlikely(!guc_submission_initialized(guc))) return; + xa_lock_irqsave(&guc->context_lookup, flags); xa_for_each(&guc->context_lookup, index, ce) { - if (!intel_context_is_pinned(ce)) + if (!kref_get_unless_zero(&ce->ref)) continue; + xa_unlock(&guc->context_lookup); + + if (!intel_context_is_pinned(ce)) + goto next; + if (intel_engine_is_virtual(ce->engine)) { if (!(ce->engine->mask & engine->mask)) - continue; + goto next; } else { if (ce->engine != engine) - continue; + goto next; } - spin_lock_irqsave(&ce->guc_active.lock, flags); + spin_lock(&ce->guc_active.lock); intel_engine_dump_active_requests(&ce->guc_active.requests, hung_rq, m); - spin_unlock_irqrestore(&ce->guc_active.lock, flags); + spin_unlock(&ce->guc_active.lock); + +next: + intel_context_put(ce); + xa_lock(&guc->context_lookup); } + xa_unlock_irqrestore(&guc->context_lookup, flags); } void intel_guc_submission_print_info(struct intel_guc *guc, @@ -2978,7 +3047,9 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, { struct intel_context *ce; unsigned long index; + unsigned long flags; + xa_lock_irqsave(&guc->context_lookup, flags); xa_for_each(&guc->context_lookup, index, ce) { drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id); drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); @@ -2997,6 +3068,7 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, guc_log_context_priority(p, ce); } + xa_unlock_irqrestore(&guc->context_lookup, flags); } static struct intel_context * -- cgit v1.2.3-70-g09d2 From 5b116c17e6babc6de2e26714bc66228c74038b71 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:40 -0700 Subject: drm/i915/guc: Drop pin count check trick between sched_disable and re-pin Drop pin count check trick between a sched_disable and re-pin, now rely on the lock and counter of the number of committed requests to determine if scheduling should be disabled on the context. Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-20-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context_types.h | 2 + drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 53 ++++++++++++++--------- 2 files changed, 34 insertions(+), 21 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index d2f798ef678c..3a5d98e908f4 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -169,6 +169,8 @@ struct intel_context { struct list_head fences; /* GuC context blocked fence */ struct i915_sw_fence blocked; + /* GuC committed requests */ + int number_committed_requests; } guc_state; struct { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 4814acf1c2ac..31f911ae14fa 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -249,6 +249,25 @@ static inline void decr_context_blocked(struct intel_context *ce) ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; } +static inline bool context_has_committed_requests(struct intel_context *ce) +{ + return !!ce->guc_state.number_committed_requests; +} + +static inline void incr_context_committed_requests(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ++ce->guc_state.number_committed_requests; + GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); +} + +static inline void decr_context_committed_requests(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + --ce->guc_state.number_committed_requests; + GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); +} + static inline bool context_guc_id_invalid(struct intel_context *ce) { return ce->guc_id == GUC_INVALID_LRC_ID; @@ -1766,24 +1785,18 @@ static void guc_context_sched_disable(struct intel_context *ce) spin_lock_irqsave(&ce->guc_state.lock, flags); /* - * We have to check if the context has been disabled by another thread. - * We also have to check if the context has been pinned again as another - * pin operation is allowed to pass this function. Checking the pin - * count, within ce->guc_state.lock, synchronizes this function with - * guc_request_alloc ensuring a request doesn't slip through the - * 'context_pending_disable' fence. Checking within the spin lock (can't - * sleep) ensures another process doesn't pin this context and generate - * a request before we set the 'context_pending_disable' flag here. + * We have to check if the context has been disabled by another thread, + * check if submssion has been disabled to seal a race with reset and + * finally check if any more requests have been committed to the + * context ensursing that a request doesn't slip through the + * 'context_pending_disable' fence. */ - if (unlikely(!context_enabled(ce) || submission_disabled(guc))) { + if (unlikely(!context_enabled(ce) || submission_disabled(guc) || + context_has_committed_requests(ce))) { clr_context_enabled(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); goto unpin; } - if (unlikely(atomic_add_unless(&ce->pin_count, -2, 2))) { - spin_unlock_irqrestore(&ce->guc_state.lock, flags); - return; - } guc_id = prep_context_pending_disable(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); @@ -1813,6 +1826,7 @@ static void __guc_context_destroy(struct intel_context *ce) ce->guc_prio_count[GUC_CLIENT_PRIORITY_HIGH] || ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || ce->guc_prio_count[GUC_CLIENT_PRIORITY_NORMAL]); + GEM_BUG_ON(ce->guc_state.number_committed_requests); lrc_fini(ce); intel_context_fini(ce); @@ -2043,6 +2057,10 @@ static void remove_from_context(struct i915_request *rq) spin_unlock_irq(&ce->guc_active.lock); + spin_lock_irq(&ce->guc_state.lock); + decr_context_committed_requests(ce); + spin_unlock_irq(&ce->guc_state.lock); + atomic_dec(&ce->guc_id_ref); i915_request_notify_execute_cb_imm(rq); } @@ -2193,15 +2211,7 @@ out: * schedule enable or context registration if either G2H is pending * respectfully. Once a G2H returns, the fence is released that is * blocking these requests (see guc_signal_context_fence). - * - * We can safely check the below fields outside of the lock as it isn't - * possible for these fields to transition from being clear to set but - * converse is possible, hence the need for the check within the lock. */ - if (likely(!context_wait_for_deregister_to_register(ce) && - !context_pending_disable(ce))) - return 0; - spin_lock_irqsave(&ce->guc_state.lock, flags); if (context_wait_for_deregister_to_register(ce) || context_pending_disable(ce)) { @@ -2210,6 +2220,7 @@ out: list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); } + incr_context_committed_requests(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); return 0; -- cgit v1.2.3-70-g09d2 From 9798b1724ba43f19deb44d2aa729af0e1cf4cd0d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:41 -0700 Subject: drm/i915/guc: Move GuC priority fields in context under guc_active Move GuC management fields in context under guc_active struct as this is where the lock that protects theses fields lives. Also only set guc_prio field once during context init. v2: (Daniele) - set CONTEXT_SET_INIT Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-21-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context_types.h | 12 ++-- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 69 +++++++++++++---------- drivers/gpu/drm/i915/i915_trace.h | 2 +- 3 files changed, 46 insertions(+), 37 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 3a5d98e908f4..b56960a781da 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -112,6 +112,7 @@ struct intel_context { #define CONTEXT_FORCE_SINGLE_SUBMISSION 7 #define CONTEXT_NOPREEMPT 8 #define CONTEXT_LRCA_DIRTY 9 +#define CONTEXT_GUC_INIT 10 struct { u64 timeout_us; @@ -178,6 +179,11 @@ struct intel_context { spinlock_t lock; /** requests: active requests on this context */ struct list_head requests; + /* + * GuC priority management + */ + u8 prio; + u32 prio_count[GUC_CLIENT_PRIORITY_NUM]; } guc_active; /* GuC LRC descriptor ID */ @@ -191,12 +197,6 @@ struct intel_context { */ struct list_head guc_id_link; - /* - * GuC priority management - */ - u8 guc_prio; - u32 guc_prio_count[GUC_CLIENT_PRIORITY_NUM]; - #ifdef CONFIG_DRM_I915_SELFTEST /** * @drop_schedule_enable: Force drop of schedule enable G2H for selftest diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 31f911ae14fa..7e9ee7d5aaab 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1385,8 +1385,6 @@ static void guc_context_policy_init(struct intel_engine_cs *engine, desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; } -static inline u8 map_i915_prio_to_guc_prio(int prio); - static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) { struct intel_engine_cs *engine = ce->engine; @@ -1394,8 +1392,6 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) struct intel_guc *guc = &engine->gt->uc.guc; u32 desc_idx = ce->guc_id; struct guc_lrc_desc *desc; - const struct i915_gem_context *ctx; - int prio = I915_CONTEXT_DEFAULT_PRIORITY; bool context_registered; intel_wakeref_t wakeref; int ret = 0; @@ -1412,12 +1408,6 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) context_registered = lrc_desc_registered(guc, desc_idx); - rcu_read_lock(); - ctx = rcu_dereference(ce->gem_context); - if (ctx) - prio = ctx->sched.priority; - rcu_read_unlock(); - reset_lrc_desc(guc, desc_idx); set_lrc_desc_registered(guc, desc_idx, ce); @@ -1426,8 +1416,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) desc->engine_submit_mask = adjust_engine_mask(engine->class, engine->mask); desc->hw_context_desc = ce->lrc.lrca; - ce->guc_prio = map_i915_prio_to_guc_prio(prio); - desc->priority = ce->guc_prio; + desc->priority = ce->guc_active.prio; desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; guc_context_policy_init(engine, desc); @@ -1822,10 +1811,10 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) static void __guc_context_destroy(struct intel_context *ce) { - GEM_BUG_ON(ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || - ce->guc_prio_count[GUC_CLIENT_PRIORITY_HIGH] || - ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || - ce->guc_prio_count[GUC_CLIENT_PRIORITY_NORMAL]); + GEM_BUG_ON(ce->guc_active.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || + ce->guc_active.prio_count[GUC_CLIENT_PRIORITY_HIGH] || + ce->guc_active.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || + ce->guc_active.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); GEM_BUG_ON(ce->guc_state.number_committed_requests); lrc_fini(ce); @@ -1935,14 +1924,17 @@ static void guc_context_set_prio(struct intel_guc *guc, GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || prio > GUC_CLIENT_PRIORITY_NORMAL); + lockdep_assert_held(&ce->guc_active.lock); - if (ce->guc_prio == prio || submission_disabled(guc) || - !context_registered(ce)) + if (ce->guc_active.prio == prio || submission_disabled(guc) || + !context_registered(ce)) { + ce->guc_active.prio = prio; return; + } guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); - ce->guc_prio = prio; + ce->guc_active.prio = prio; trace_intel_context_set_prio(ce); } @@ -1962,24 +1954,24 @@ static inline void add_context_inflight_prio(struct intel_context *ce, u8 guc_prio) { lockdep_assert_held(&ce->guc_active.lock); - GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count)); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_active.prio_count)); - ++ce->guc_prio_count[guc_prio]; + ++ce->guc_active.prio_count[guc_prio]; /* Overflow protection */ - GEM_WARN_ON(!ce->guc_prio_count[guc_prio]); + GEM_WARN_ON(!ce->guc_active.prio_count[guc_prio]); } static inline void sub_context_inflight_prio(struct intel_context *ce, u8 guc_prio) { lockdep_assert_held(&ce->guc_active.lock); - GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count)); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_active.prio_count)); /* Underflow protection */ - GEM_WARN_ON(!ce->guc_prio_count[guc_prio]); + GEM_WARN_ON(!ce->guc_active.prio_count[guc_prio]); - --ce->guc_prio_count[guc_prio]; + --ce->guc_active.prio_count[guc_prio]; } static inline void update_context_prio(struct intel_context *ce) @@ -1992,8 +1984,8 @@ static inline void update_context_prio(struct intel_context *ce) lockdep_assert_held(&ce->guc_active.lock); - for (i = 0; i < ARRAY_SIZE(ce->guc_prio_count); ++i) { - if (ce->guc_prio_count[i]) { + for (i = 0; i < ARRAY_SIZE(ce->guc_active.prio_count); ++i) { + if (ce->guc_active.prio_count[i]) { guc_context_set_prio(guc, ce, i); break; } @@ -2135,6 +2127,21 @@ static bool context_needs_register(struct intel_context *ce, bool new_guc_id) !submission_disabled(ce_to_guc(ce)); } +static void guc_context_init(struct intel_context *ce) +{ + const struct i915_gem_context *ctx; + int prio = I915_CONTEXT_DEFAULT_PRIORITY; + + rcu_read_lock(); + ctx = rcu_dereference(ce->gem_context); + if (ctx) + prio = ctx->sched.priority; + rcu_read_unlock(); + + ce->guc_active.prio = map_i915_prio_to_guc_prio(prio); + set_bit(CONTEXT_GUC_INIT, &ce->flags); +} + static int guc_request_alloc(struct i915_request *rq) { struct intel_context *ce = rq->context; @@ -2166,6 +2173,9 @@ static int guc_request_alloc(struct i915_request *rq) rq->reserved_space -= GUC_REQUEST_SIZE; + if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) + guc_context_init(ce); + /* * Call pin_guc_id here rather than in the pinning step as with * dma_resv, contexts can be repeatedly pinned / unpinned trashing the @@ -3042,13 +3052,12 @@ static inline void guc_log_context_priority(struct drm_printer *p, { int i; - drm_printf(p, "\t\tPriority: %d\n", - ce->guc_prio); + drm_printf(p, "\t\tPriority: %d\n", ce->guc_active.prio); drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; i < GUC_CLIENT_PRIORITY_NUM; ++i) { drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", - i, ce->guc_prio_count[i]); + i, ce->guc_active.prio_count[i]); } drm_printf(p, "\n"); } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 510168c1d703..181c5831405b 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -910,7 +910,7 @@ DECLARE_EVENT_CLASS(intel_context, __entry->guc_id = ce->guc_id; __entry->pin_count = atomic_read(&ce->pin_count); __entry->sched_state = ce->guc_state.sched_state; - __entry->guc_prio = ce->guc_prio; + __entry->guc_prio = ce->guc_active.prio; ), TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x, guc_prio=%u", -- cgit v1.2.3-70-g09d2 From 3cb3e3434b9f9c34e98605658818b72fdaef0795 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:42 -0700 Subject: drm/i915/guc: Move fields protected by guc->contexts_lock into sub structure To make ownership of locking clear move fields (guc_id, guc_id_ref, guc_id_link) to sub structure guc_id in intel_context. Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-22-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context.c | 4 +- drivers/gpu/drm/i915/gt/intel_context_types.h | 18 ++-- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 6 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 104 +++++++++++----------- drivers/gpu/drm/i915/i915_trace.h | 4 +- 5 files changed, 69 insertions(+), 67 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 3048267ddc7e..485460a11331 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -398,8 +398,8 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) spin_lock_init(&ce->guc_active.lock); INIT_LIST_HEAD(&ce->guc_active.requests); - ce->guc_id = GUC_INVALID_LRC_ID; - INIT_LIST_HEAD(&ce->guc_id_link); + ce->guc_id.id = GUC_INVALID_LRC_ID; + INIT_LIST_HEAD(&ce->guc_id.link); /* * Initialize fence to be complete as this is expected to be complete diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index b56960a781da..0b00d249c884 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -186,16 +186,18 @@ struct intel_context { u32 prio_count[GUC_CLIENT_PRIORITY_NUM]; } guc_active; - /* GuC LRC descriptor ID */ - u16 guc_id; + struct { + /* GuC LRC descriptor ID */ + u16 id; - /* GuC LRC descriptor reference count */ - atomic_t guc_id_ref; + /* GuC LRC descriptor reference count */ + atomic_t ref; - /* - * GuC ID link - in list when unpinned but guc_id still valid in GuC - */ - struct list_head guc_id_link; + /* + * GuC ID link - in list when unpinned but guc_id still valid in GuC + */ + struct list_head link; + } guc_id; #ifdef CONFIG_DRM_I915_SELFTEST /** diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 8be23e0f9306..7e6fdabac599 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -789,7 +789,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) if (err) pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n", engine->name, rq->fence.context, - rq->fence.seqno, rq->context->guc_id, err); + rq->fence.seqno, rq->context->guc_id.id, err); } skip: @@ -1098,7 +1098,7 @@ static int __igt_reset_engines(struct intel_gt *gt, if (err) pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n", engine->name, rq->fence.context, - rq->fence.seqno, rq->context->guc_id, err); + rq->fence.seqno, rq->context->guc_id.id, err); } count++; @@ -1108,7 +1108,7 @@ static int __igt_reset_engines(struct intel_gt *gt, pr_err("i915_reset_engine(%s:%s): failed to reset request %lld:%lld [0x%04X]\n", engine->name, test_name, rq->fence.context, - rq->fence.seqno, rq->context->guc_id); + rq->fence.seqno, rq->context->guc_id.id); i915_request_put(rq); GEM_TRACE_DUMP(); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 7e9ee7d5aaab..2c9a6b4ce071 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -270,12 +270,12 @@ static inline void decr_context_committed_requests(struct intel_context *ce) static inline bool context_guc_id_invalid(struct intel_context *ce) { - return ce->guc_id == GUC_INVALID_LRC_ID; + return ce->guc_id.id == GUC_INVALID_LRC_ID; } static inline void set_context_guc_id_invalid(struct intel_context *ce) { - ce->guc_id = GUC_INVALID_LRC_ID; + ce->guc_id.id = GUC_INVALID_LRC_ID; } static inline struct intel_guc *ce_to_guc(struct intel_context *ce) @@ -468,14 +468,14 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) return 0; } - GEM_BUG_ON(!atomic_read(&ce->guc_id_ref)); + GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); GEM_BUG_ON(context_guc_id_invalid(ce)); /* * Corner case where the GuC firmware was blown away and reloaded while * this context was pinned. */ - if (unlikely(!lrc_desc_registered(guc, ce->guc_id))) { + if (unlikely(!lrc_desc_registered(guc, ce->guc_id.id))) { err = guc_lrc_desc_pin(ce, false); if (unlikely(err)) return err; @@ -494,14 +494,14 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) if (!enabled) { action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; - action[len++] = ce->guc_id; + action[len++] = ce->guc_id.id; action[len++] = GUC_CONTEXT_ENABLE; set_context_pending_enable(ce); intel_context_get(ce); g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; } else { action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; - action[len++] = ce->guc_id; + action[len++] = ce->guc_id.id; } err = intel_guc_send_nb(guc, action, len, g2h_len_dw); @@ -1167,12 +1167,12 @@ static int new_guc_id(struct intel_guc *guc) static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) { if (!context_guc_id_invalid(ce)) { - ida_simple_remove(&guc->guc_ids, ce->guc_id); - reset_lrc_desc(guc, ce->guc_id); + ida_simple_remove(&guc->guc_ids, ce->guc_id.id); + reset_lrc_desc(guc, ce->guc_id.id); set_context_guc_id_invalid(ce); } - if (!list_empty(&ce->guc_id_link)) - list_del_init(&ce->guc_id_link); + if (!list_empty(&ce->guc_id.link)) + list_del_init(&ce->guc_id.link); } static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) @@ -1194,13 +1194,13 @@ static int steal_guc_id(struct intel_guc *guc) if (!list_empty(&guc->guc_id_list)) { ce = list_first_entry(&guc->guc_id_list, struct intel_context, - guc_id_link); + guc_id.link); - GEM_BUG_ON(atomic_read(&ce->guc_id_ref)); + GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); GEM_BUG_ON(context_guc_id_invalid(ce)); - list_del_init(&ce->guc_id_link); - guc_id = ce->guc_id; + list_del_init(&ce->guc_id.link); + guc_id = ce->guc_id.id; spin_lock(&ce->guc_state.lock); clr_context_registered(ce); @@ -1236,7 +1236,7 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) int ret = 0; unsigned long flags, tries = PIN_GUC_ID_TRIES; - GEM_BUG_ON(atomic_read(&ce->guc_id_ref)); + GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); try_again: spin_lock_irqsave(&guc->contexts_lock, flags); @@ -1244,20 +1244,20 @@ try_again: might_lock(&ce->guc_state.lock); if (context_guc_id_invalid(ce)) { - ret = assign_guc_id(guc, &ce->guc_id); + ret = assign_guc_id(guc, &ce->guc_id.id); if (ret) goto out_unlock; ret = 1; /* Indidcates newly assigned guc_id */ } - if (!list_empty(&ce->guc_id_link)) - list_del_init(&ce->guc_id_link); - atomic_inc(&ce->guc_id_ref); + if (!list_empty(&ce->guc_id.link)) + list_del_init(&ce->guc_id.link); + atomic_inc(&ce->guc_id.ref); out_unlock: spin_unlock_irqrestore(&guc->contexts_lock, flags); /* - * -EAGAIN indicates no guc_ids are available, let's retire any + * -EAGAIN indicates no guc_id are available, let's retire any * outstanding requests to see if that frees up a guc_id. If the first * retire didn't help, insert a sleep with the timeslice duration before * attempting to retire more requests. Double the sleep period each @@ -1285,15 +1285,15 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) { unsigned long flags; - GEM_BUG_ON(atomic_read(&ce->guc_id_ref) < 0); + GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); if (unlikely(context_guc_id_invalid(ce))) return; spin_lock_irqsave(&guc->contexts_lock, flags); - if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id_link) && - !atomic_read(&ce->guc_id_ref)) - list_add_tail(&ce->guc_id_link, &guc->guc_id_list); + if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && + !atomic_read(&ce->guc_id.ref)) + list_add_tail(&ce->guc_id.link, &guc->guc_id_list); spin_unlock_irqrestore(&guc->contexts_lock, flags); } @@ -1316,12 +1316,12 @@ static int register_context(struct intel_context *ce, bool loop) { struct intel_guc *guc = ce_to_guc(ce); u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) + - ce->guc_id * sizeof(struct guc_lrc_desc); + ce->guc_id.id * sizeof(struct guc_lrc_desc); int ret; trace_intel_context_register(ce); - ret = __guc_action_register_context(guc, ce->guc_id, offset, loop); + ret = __guc_action_register_context(guc, ce->guc_id.id, offset, loop); if (likely(!ret)) { unsigned long flags; @@ -1390,7 +1390,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) struct intel_engine_cs *engine = ce->engine; struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; struct intel_guc *guc = &engine->gt->uc.guc; - u32 desc_idx = ce->guc_id; + u32 desc_idx = ce->guc_id.id; struct guc_lrc_desc *desc; bool context_registered; intel_wakeref_t wakeref; @@ -1453,7 +1453,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) * context whose guc_id was stolen. */ with_intel_runtime_pm(runtime_pm, wakeref) - ret = deregister_context(ce, ce->guc_id); + ret = deregister_context(ce, ce->guc_id.id); if (unlikely(ret == -ENODEV)) ret = 0; /* Will get registered later */ } else { @@ -1524,7 +1524,7 @@ static void __guc_context_sched_enable(struct intel_guc *guc, { u32 action[] = { INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, - ce->guc_id, + ce->guc_id.id, GUC_CONTEXT_ENABLE }; @@ -1540,7 +1540,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc, { u32 action[] = { INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, - guc_id, /* ce->guc_id not stable */ + guc_id, /* ce->guc_id.id not stable */ GUC_CONTEXT_DISABLE }; @@ -1585,7 +1585,7 @@ static u16 prep_context_pending_disable(struct intel_context *ce) guc_blocked_fence_reinit(ce); intel_context_get(ce); - return ce->guc_id; + return ce->guc_id.id; } static struct i915_sw_fence *guc_context_block(struct intel_context *ce) @@ -1638,7 +1638,7 @@ static bool context_cant_unblock(struct intel_context *ce) return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || context_guc_id_invalid(ce) || - !lrc_desc_registered(ce_to_guc(ce), ce->guc_id) || + !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id) || !intel_context_is_pinned(ce); } @@ -1757,7 +1757,7 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) if (!context_guc_id_invalid(ce)) with_intel_runtime_pm(runtime_pm, wakeref) __guc_context_set_preemption_timeout(guc, - ce->guc_id, + ce->guc_id.id, 1); spin_unlock_irqrestore(&ce->guc_state.lock, flags); } @@ -1802,11 +1802,11 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); - GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id)); - GEM_BUG_ON(ce != __get_context(guc, ce->guc_id)); + GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id)); + GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); GEM_BUG_ON(context_enabled(ce)); - deregister_context(ce, ce->guc_id); + deregister_context(ce, ce->guc_id.id); } static void __guc_context_destroy(struct intel_context *ce) @@ -1851,7 +1851,7 @@ static void guc_context_destroy(struct kref *kref) __guc_context_destroy(ce); return; } else if (submission_disabled(guc) || - !lrc_desc_registered(guc, ce->guc_id)) { + !lrc_desc_registered(guc, ce->guc_id.id)) { release_guc_id(guc, ce); __guc_context_destroy(ce); return; @@ -1860,10 +1860,10 @@ static void guc_context_destroy(struct kref *kref) /* * We have to acquire the context spinlock and check guc_id again, if it * is valid it hasn't been stolen and needs to be deregistered. We - * delete this context from the list of unpinned guc_ids available to + * delete this context from the list of unpinned guc_id available to * steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB * returns indicating this context has been deregistered the guc_id is - * returned to the pool of available guc_ids. + * returned to the pool of available guc_id. */ spin_lock_irqsave(&guc->contexts_lock, flags); if (context_guc_id_invalid(ce)) { @@ -1872,8 +1872,8 @@ static void guc_context_destroy(struct kref *kref) return; } - if (!list_empty(&ce->guc_id_link)) - list_del_init(&ce->guc_id_link); + if (!list_empty(&ce->guc_id.link)) + list_del_init(&ce->guc_id.link); spin_unlock_irqrestore(&guc->contexts_lock, flags); /* Seal race with Reset */ @@ -1918,7 +1918,7 @@ static void guc_context_set_prio(struct intel_guc *guc, { u32 action[] = { INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY, - ce->guc_id, + ce->guc_id.id, prio, }; @@ -2053,7 +2053,7 @@ static void remove_from_context(struct i915_request *rq) decr_context_committed_requests(ce); spin_unlock_irq(&ce->guc_state.lock); - atomic_dec(&ce->guc_id_ref); + atomic_dec(&ce->guc_id.ref); i915_request_notify_execute_cb_imm(rq); } @@ -2123,7 +2123,7 @@ static void guc_signal_context_fence(struct intel_context *ce) static bool context_needs_register(struct intel_context *ce, bool new_guc_id) { return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || - !lrc_desc_registered(ce_to_guc(ce), ce->guc_id)) && + !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)) && !submission_disabled(ce_to_guc(ce)); } @@ -2179,11 +2179,11 @@ static int guc_request_alloc(struct i915_request *rq) /* * Call pin_guc_id here rather than in the pinning step as with * dma_resv, contexts can be repeatedly pinned / unpinned trashing the - * guc_ids and creating horrible race conditions. This is especially bad - * when guc_ids are being stolen due to over subscription. By the time + * guc_id and creating horrible race conditions. This is especially bad + * when guc_id are being stolen due to over subscription. By the time * this function is reached, it is guaranteed that the guc_id will be * persistent until the generated request is retired. Thus, sealing these - * race conditions. It is still safe to fail here if guc_ids are + * race conditions. It is still safe to fail here if guc_id are * exhausted and return -EAGAIN to the user indicating that they can try * again in the future. * @@ -2193,7 +2193,7 @@ static int guc_request_alloc(struct i915_request *rq) * decremented on each retire. When it is zero, a lock around the * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. */ - if (atomic_add_unless(&ce->guc_id_ref, 1, 0)) + if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) goto out; ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ @@ -2206,7 +2206,7 @@ static int guc_request_alloc(struct i915_request *rq) disable_submission(guc); goto out; /* GPU will be reset */ } - atomic_dec(&ce->guc_id_ref); + atomic_dec(&ce->guc_id.ref); unpin_guc_id(guc, ce); return ret; } @@ -3040,7 +3040,7 @@ void intel_guc_submission_print_info(struct intel_guc *guc, priolist_for_each_request(rq, pl) drm_printf(p, "guc_id=%u, seqno=%llu\n", - rq->context->guc_id, + rq->context->guc_id.id, rq->fence.seqno); } spin_unlock_irqrestore(&sched_engine->lock, flags); @@ -3071,7 +3071,7 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, xa_lock_irqsave(&guc->context_lookup, flags); xa_for_each(&guc->context_lookup, index, ce) { - drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id); + drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", ce->ring->head, @@ -3082,7 +3082,7 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, drm_printf(p, "\t\tContext Pin Count: %u\n", atomic_read(&ce->pin_count)); drm_printf(p, "\t\tGuC ID Ref Count: %u\n", - atomic_read(&ce->guc_id_ref)); + atomic_read(&ce->guc_id.ref)); drm_printf(p, "\t\tSchedule State: 0x%x\n\n", ce->guc_state.sched_state); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 181c5831405b..75a40a33e13a 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -805,7 +805,7 @@ DECLARE_EVENT_CLASS(i915_request, __entry->dev = rq->engine->i915->drm.primary->index; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; - __entry->guc_id = rq->context->guc_id; + __entry->guc_id = rq->context->guc_id.id; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; __entry->tail = rq->tail; @@ -907,7 +907,7 @@ DECLARE_EVENT_CLASS(intel_context, ), TP_fast_assign( - __entry->guc_id = ce->guc_id; + __entry->guc_id = ce->guc_id.id; __entry->pin_count = atomic_read(&ce->pin_count); __entry->sched_state = ce->guc_state.sched_state; __entry->guc_prio = ce->guc_active.prio; -- cgit v1.2.3-70-g09d2 From af5bc9f21e3acb479683e4339c5c3ea27334b270 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:43 -0700 Subject: drm/i915/guc: Drop guc_active move everything into guc_state Now that we have locking hierarchy of sched_engine->lock -> ce->guc_state everything from guc_active can be moved into guc_state and protected the guc_state.lock. Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-23-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context.c | 10 ++- drivers/gpu/drm/i915/gt/intel_context_types.h | 7 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 88 +++++++++++------------ drivers/gpu/drm/i915/i915_trace.h | 2 +- 4 files changed, 49 insertions(+), 58 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 485460a11331..ff637147b1a9 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -394,9 +394,7 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) spin_lock_init(&ce->guc_state.lock); INIT_LIST_HEAD(&ce->guc_state.fences); - - spin_lock_init(&ce->guc_active.lock); - INIT_LIST_HEAD(&ce->guc_active.requests); + INIT_LIST_HEAD(&ce->guc_state.requests); ce->guc_id.id = GUC_INVALID_LRC_ID; INIT_LIST_HEAD(&ce->guc_id.link); @@ -521,15 +519,15 @@ struct i915_request *intel_context_find_active_request(struct intel_context *ce) GEM_BUG_ON(!intel_engine_uses_guc(ce->engine)); - spin_lock_irqsave(&ce->guc_active.lock, flags); - list_for_each_entry_reverse(rq, &ce->guc_active.requests, + spin_lock_irqsave(&ce->guc_state.lock, flags); + list_for_each_entry_reverse(rq, &ce->guc_state.requests, sched.link) { if (i915_request_completed(rq)) break; active = rq; } - spin_unlock_irqrestore(&ce->guc_active.lock, flags); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); return active; } diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 0b00d249c884..5285d660eacf 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -172,11 +172,6 @@ struct intel_context { struct i915_sw_fence blocked; /* GuC committed requests */ int number_committed_requests; - } guc_state; - - struct { - /** lock: protects everything in guc_active */ - spinlock_t lock; /** requests: active requests on this context */ struct list_head requests; /* @@ -184,7 +179,7 @@ struct intel_context { */ u8 prio; u32 prio_count[GUC_CLIENT_PRIORITY_NUM]; - } guc_active; + } guc_state; struct { /* GuC LRC descriptor ID */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 2c9a6b4ce071..a9cf49298067 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -846,9 +846,9 @@ __unwind_incomplete_requests(struct intel_context *ce) unsigned long flags; spin_lock_irqsave(&sched_engine->lock, flags); - spin_lock(&ce->guc_active.lock); + spin_lock(&ce->guc_state.lock); list_for_each_entry_safe_reverse(rq, rn, - &ce->guc_active.requests, + &ce->guc_state.requests, sched.link) { if (i915_request_completed(rq)) continue; @@ -867,7 +867,7 @@ __unwind_incomplete_requests(struct intel_context *ce) list_add(&rq->sched.link, pl); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); } - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); spin_unlock_irqrestore(&sched_engine->lock, flags); } @@ -962,10 +962,10 @@ static void guc_cancel_context_requests(struct intel_context *ce) /* Mark all executing requests as skipped. */ spin_lock_irqsave(&sched_engine->lock, flags); - spin_lock(&ce->guc_active.lock); - list_for_each_entry(rq, &ce->guc_active.requests, sched.link) + spin_lock(&ce->guc_state.lock); + list_for_each_entry(rq, &ce->guc_state.requests, sched.link) i915_request_put(i915_request_mark_eio(rq)); - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); spin_unlock_irqrestore(&sched_engine->lock, flags); } @@ -1416,7 +1416,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) desc->engine_submit_mask = adjust_engine_mask(engine->class, engine->mask); desc->hw_context_desc = ce->lrc.lrca; - desc->priority = ce->guc_active.prio; + desc->priority = ce->guc_state.prio; desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; guc_context_policy_init(engine, desc); @@ -1811,10 +1811,10 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) static void __guc_context_destroy(struct intel_context *ce) { - GEM_BUG_ON(ce->guc_active.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || - ce->guc_active.prio_count[GUC_CLIENT_PRIORITY_HIGH] || - ce->guc_active.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || - ce->guc_active.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); + GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || + ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || + ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || + ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); GEM_BUG_ON(ce->guc_state.number_committed_requests); lrc_fini(ce); @@ -1924,17 +1924,17 @@ static void guc_context_set_prio(struct intel_guc *guc, GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || prio > GUC_CLIENT_PRIORITY_NORMAL); - lockdep_assert_held(&ce->guc_active.lock); + lockdep_assert_held(&ce->guc_state.lock); - if (ce->guc_active.prio == prio || submission_disabled(guc) || + if (ce->guc_state.prio == prio || submission_disabled(guc) || !context_registered(ce)) { - ce->guc_active.prio = prio; + ce->guc_state.prio = prio; return; } guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); - ce->guc_active.prio = prio; + ce->guc_state.prio = prio; trace_intel_context_set_prio(ce); } @@ -1953,25 +1953,25 @@ static inline u8 map_i915_prio_to_guc_prio(int prio) static inline void add_context_inflight_prio(struct intel_context *ce, u8 guc_prio) { - lockdep_assert_held(&ce->guc_active.lock); - GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_active.prio_count)); + lockdep_assert_held(&ce->guc_state.lock); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); - ++ce->guc_active.prio_count[guc_prio]; + ++ce->guc_state.prio_count[guc_prio]; /* Overflow protection */ - GEM_WARN_ON(!ce->guc_active.prio_count[guc_prio]); + GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); } static inline void sub_context_inflight_prio(struct intel_context *ce, u8 guc_prio) { - lockdep_assert_held(&ce->guc_active.lock); - GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_active.prio_count)); + lockdep_assert_held(&ce->guc_state.lock); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); /* Underflow protection */ - GEM_WARN_ON(!ce->guc_active.prio_count[guc_prio]); + GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); - --ce->guc_active.prio_count[guc_prio]; + --ce->guc_state.prio_count[guc_prio]; } static inline void update_context_prio(struct intel_context *ce) @@ -1982,10 +1982,10 @@ static inline void update_context_prio(struct intel_context *ce) BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); - lockdep_assert_held(&ce->guc_active.lock); + lockdep_assert_held(&ce->guc_state.lock); - for (i = 0; i < ARRAY_SIZE(ce->guc_active.prio_count); ++i) { - if (ce->guc_active.prio_count[i]) { + for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { + if (ce->guc_state.prio_count[i]) { guc_context_set_prio(guc, ce, i); break; } @@ -2005,8 +2005,8 @@ static void add_to_context(struct i915_request *rq) GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); - spin_lock(&ce->guc_active.lock); - list_move_tail(&rq->sched.link, &ce->guc_active.requests); + spin_lock(&ce->guc_state.lock); + list_move_tail(&rq->sched.link, &ce->guc_state.requests); if (rq->guc_prio == GUC_PRIO_INIT) { rq->guc_prio = new_guc_prio; @@ -2018,12 +2018,12 @@ static void add_to_context(struct i915_request *rq) } update_context_prio(ce); - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); } static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) { - lockdep_assert_held(&ce->guc_active.lock); + lockdep_assert_held(&ce->guc_state.lock); if (rq->guc_prio != GUC_PRIO_INIT && rq->guc_prio != GUC_PRIO_FINI) { @@ -2037,7 +2037,7 @@ static void remove_from_context(struct i915_request *rq) { struct intel_context *ce = rq->context; - spin_lock_irq(&ce->guc_active.lock); + spin_lock_irq(&ce->guc_state.lock); list_del_init(&rq->sched.link); clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); @@ -2047,10 +2047,8 @@ static void remove_from_context(struct i915_request *rq) guc_prio_fini(rq, ce); - spin_unlock_irq(&ce->guc_active.lock); - - spin_lock_irq(&ce->guc_state.lock); decr_context_committed_requests(ce); + spin_unlock_irq(&ce->guc_state.lock); atomic_dec(&ce->guc_id.ref); @@ -2138,7 +2136,7 @@ static void guc_context_init(struct intel_context *ce) prio = ctx->sched.priority; rcu_read_unlock(); - ce->guc_active.prio = map_i915_prio_to_guc_prio(prio); + ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); set_bit(CONTEXT_GUC_INIT, &ce->flags); } @@ -2372,7 +2370,7 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq, !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) return; - spin_lock(&ce->guc_active.lock); + spin_lock(&ce->guc_state.lock); if (rq->guc_prio != GUC_PRIO_FINI) { if (rq->guc_prio != GUC_PRIO_INIT) sub_context_inflight_prio(ce, rq->guc_prio); @@ -2380,16 +2378,16 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq, add_context_inflight_prio(ce, rq->guc_prio); update_context_prio(ce); } - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); } static void guc_retire_inflight_request_prio(struct i915_request *rq) { struct intel_context *ce = rq->context; - spin_lock(&ce->guc_active.lock); + spin_lock(&ce->guc_state.lock); guc_prio_fini(rq, ce); - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); } static void sanitize_hwsp(struct intel_engine_cs *engine) @@ -2955,7 +2953,7 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine) goto next; } - list_for_each_entry(rq, &ce->guc_active.requests, sched.link) { + list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) continue; @@ -3005,10 +3003,10 @@ void intel_guc_dump_active_requests(struct intel_engine_cs *engine, goto next; } - spin_lock(&ce->guc_active.lock); - intel_engine_dump_active_requests(&ce->guc_active.requests, + spin_lock(&ce->guc_state.lock); + intel_engine_dump_active_requests(&ce->guc_state.requests, hung_rq, m); - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); next: intel_context_put(ce); @@ -3052,12 +3050,12 @@ static inline void guc_log_context_priority(struct drm_printer *p, { int i; - drm_printf(p, "\t\tPriority: %d\n", ce->guc_active.prio); + drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; i < GUC_CLIENT_PRIORITY_NUM; ++i) { drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", - i, ce->guc_active.prio_count[i]); + i, ce->guc_state.prio_count[i]); } drm_printf(p, "\n"); } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 75a40a33e13a..9795f456cccf 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -910,7 +910,7 @@ DECLARE_EVENT_CLASS(intel_context, __entry->guc_id = ce->guc_id.id; __entry->pin_count = atomic_read(&ce->pin_count); __entry->sched_state = ce->guc_state.sched_state; - __entry->guc_prio = ce->guc_active.prio; + __entry->guc_prio = ce->guc_state.prio; ), TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x, guc_prio=%u", -- cgit v1.2.3-70-g09d2 From 4f41ddc7c7eeb0a41c3a07da975fd7a0c5715e85 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:44 -0700 Subject: drm/i915/guc: Add GuC kernel doc Add GuC kernel doc for all structures added thus far for GuC submission and update the main GuC submission section with the new interface details. v2: - Drop guc_active.lock DOC v3: - Fixup a few kernel doc comments (Daniele) v4 (Daniele): - Implement doc suggestions from John - Add kerneldoc for all members of the GuC structure and pull the file in i915.rst v5 (Daniele): - Implement new doc suggestions from John Signed-off-by: Matthew Brost Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-24-matthew.brost@intel.com --- Documentation/gpu/i915.rst | 2 + drivers/gpu/drm/i915/gt/intel_context_types.h | 43 ++++++---- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 75 +++++++++++++--- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 100 +++++++++++++++++----- drivers/gpu/drm/i915/i915_request.h | 21 +++-- 5 files changed, 181 insertions(+), 60 deletions(-) (limited to 'drivers') diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 204ebdaadb45..e9b11044075e 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -498,6 +498,8 @@ GuC .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.c :doc: GuC +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.h + GuC Firmware Layout ~~~~~~~~~~~~~~~~~~~ diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 5285d660eacf..930569a1a01f 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -156,40 +156,51 @@ struct intel_context { u8 wa_bb_page; /* if set, page num reserved for context workarounds */ struct { - /** lock: protects everything in guc_state */ + /** @lock: protects everything in guc_state */ spinlock_t lock; /** - * sched_state: scheduling state of this context using GuC + * @sched_state: scheduling state of this context using GuC * submission */ u32 sched_state; /* - * fences: maintains of list of requests that have a submit - * fence related to GuC submission + * @fences: maintains a list of requests that are currently + * being fenced until a GuC operation completes */ struct list_head fences; - /* GuC context blocked fence */ + /** + * @blocked: fence used to signal when the blocking of a + * context's submissions is complete. + */ struct i915_sw_fence blocked; - /* GuC committed requests */ + /** @number_committed_requests: number of committed requests */ int number_committed_requests; - /** requests: active requests on this context */ + /** @requests: list of active requests on this context */ struct list_head requests; - /* - * GuC priority management - */ + /** @prio: the context's current guc priority */ u8 prio; + /** + * @prio_count: a counter of the number requests in flight in + * each priority bucket + */ u32 prio_count[GUC_CLIENT_PRIORITY_NUM]; } guc_state; struct { - /* GuC LRC descriptor ID */ + /** + * @id: handle which is used to uniquely identify this context + * with the GuC, protected by guc->contexts_lock + */ u16 id; - - /* GuC LRC descriptor reference count */ + /** + * @ref: the number of references to the guc_id, when + * transitioning in and out of zero protected by + * guc->contexts_lock + */ atomic_t ref; - - /* - * GuC ID link - in list when unpinned but guc_id still valid in GuC + /** + * @link: in guc->guc_id_list when the guc_id has no refs but is + * still valid, protected by guc->contexts_lock */ struct list_head link; } guc_id; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 2e27fe59786b..5dd174babf7a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -22,74 +22,121 @@ struct __guc_ads_blob; -/* - * Top level structure of GuC. It handles firmware loading and manages client - * pool. intel_guc owns a intel_guc_client to replace the legacy ExecList - * submission. +/** + * struct intel_guc - Top level structure of GuC. + * + * It handles firmware loading and manages client pool. intel_guc owns an + * i915_sched_engine for submission. */ struct intel_guc { + /** @fw: the GuC firmware */ struct intel_uc_fw fw; + /** @log: sub-structure containing GuC log related data and objects */ struct intel_guc_log log; + /** @ct: the command transport communication channel */ struct intel_guc_ct ct; + /** @slpc: sub-structure containing SLPC related data and objects */ struct intel_guc_slpc slpc; - /* Global engine used to submit requests to GuC */ + /** @sched_engine: Global engine used to submit requests to GuC */ struct i915_sched_engine *sched_engine; + /** + * @stalled_request: if GuC can't process a request for any reason, we + * save it until GuC restarts processing. No other request can be + * submitted until the stalled request is processed. + */ struct i915_request *stalled_request; /* intel_guc_recv interrupt related state */ + /** @irq_lock: protects GuC irq state */ spinlock_t irq_lock; + /** + * @msg_enabled_mask: mask of events that are processed when receiving + * an INTEL_GUC_ACTION_DEFAULT G2H message. + */ unsigned int msg_enabled_mask; + /** + * @outstanding_submission_g2h: number of outstanding GuC to Host + * responses related to GuC submission, used to determine if the GT is + * idle + */ atomic_t outstanding_submission_g2h; + /** @interrupts: pointers to GuC interrupt-managing functions. */ struct { void (*reset)(struct intel_guc *guc); void (*enable)(struct intel_guc *guc); void (*disable)(struct intel_guc *guc); } interrupts; - /* - * contexts_lock protects the pool of free guc ids and a linked list of - * guc ids available to be stolen + /** + * @contexts_lock: protects guc_ids, guc_id_list, ce->guc_id.id, and + * ce->guc_id.ref when transitioning in and out of zero */ spinlock_t contexts_lock; + /** @guc_ids: used to allocate unique ce->guc_id.id values */ struct ida guc_ids; + /** + * @guc_id_list: list of intel_context with valid guc_ids but no refs + */ struct list_head guc_id_list; + /** + * @submission_supported: tracks whether we support GuC submission on + * the current platform + */ bool submission_supported; + /** @submission_selected: tracks whether the user enabled GuC submission */ bool submission_selected; + /** + * @rc_supported: tracks whether we support GuC rc on the current platform + */ bool rc_supported; + /** @rc_selected: tracks whether the user enabled GuC rc */ bool rc_selected; + /** @ads_vma: object allocated to hold the GuC ADS */ struct i915_vma *ads_vma; + /** @ads_blob: contents of the GuC ADS */ struct __guc_ads_blob *ads_blob; + /** @ads_regset_size: size of the save/restore regsets in the ADS */ u32 ads_regset_size; + /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */ u32 ads_golden_ctxt_size; + /** @lrc_desc_pool: object allocated to hold the GuC LRC descriptor pool */ struct i915_vma *lrc_desc_pool; + /** @lrc_desc_pool_vaddr: contents of the GuC LRC descriptor pool */ void *lrc_desc_pool_vaddr; - /* guc_id to intel_context lookup */ + /** + * @context_lookup: used to resolve intel_context from guc_id, if a + * context is present in this structure it is registered with the GuC + */ struct xarray context_lookup; - /* Control params for fw initialization */ + /** @params: Control params for fw initialization */ u32 params[GUC_CTL_MAX_DWORDS]; - /* GuC's FW specific registers used in MMIO send */ + /** @send_regs: GuC's FW specific registers used for sending MMIO H2G */ struct { u32 base; unsigned int count; enum forcewake_domains fw_domains; } send_regs; - /* register used to send interrupts to the GuC FW */ + /** @notify_reg: register used to send interrupts to the GuC FW */ i915_reg_t notify_reg; - /* Store msg (e.g. log flush) that we see while CTBs are disabled */ + /** + * @mmio_msg: notification bitmask that the GuC writes in one of its + * registers when the CT channel is disabled, to be processed when the + * channel is back up. + */ u32 mmio_msg; - /* To serialize the intel_guc_send actions */ + /** @send_mutex: used to serialize the intel_guc_send actions */ struct mutex send_mutex; }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index a9cf49298067..c7a41802b448 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -28,21 +28,6 @@ /** * DOC: GuC-based command submission * - * IMPORTANT NOTE: GuC submission is currently not supported in i915. The GuC - * firmware is moving to an updated submission interface and we plan to - * turn submission back on when that lands. The below documentation (and related - * code) matches the old submission model and will be updated as part of the - * upgrade to the new flow. - * - * GuC stage descriptor: - * During initialization, the driver allocates a static pool of 1024 such - * descriptors, and shares them with the GuC. Currently, we only use one - * descriptor. This stage descriptor lets the GuC know about the workqueue and - * process descriptor. Theoretically, it also lets the GuC know about our HW - * contexts (context ID, etc...), but we actually employ a kind of submission - * where the GuC uses the LRCA sent via the work item instead. This is called - * a "proxy" submission. - * * The Scratch registers: * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes * a value to the action register (SOFT_SCRATCH_0) along with any data. It then @@ -51,14 +36,85 @@ * processes the request. The kernel driver polls waiting for this update and * then proceeds. * - * Work Items: - * There are several types of work items that the host may place into a - * workqueue, each with its own requirements and limitations. Currently only - * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which - * represents in-order queue. The kernel driver packs ring tail pointer and an - * ELSP context descriptor dword into Work Item. - * See guc_add_request() + * Command Transport buffers (CTBs): + * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host + * - G2H) are a message interface between the i915 and GuC. + * + * Context registration: + * Before a context can be submitted it must be registered with the GuC via a + * H2G. A unique guc_id is associated with each context. The context is either + * registered at request creation time (normal operation) or at submission time + * (abnormal operation, e.g. after a reset). + * + * Context submission: + * The i915 updates the LRC tail value in memory. The i915 must enable the + * scheduling of the context within the GuC for the GuC to actually consider it. + * Therefore, the first time a disabled context is submitted we use a schedule + * enable H2G, while follow up submissions are done via the context submit H2G, + * which informs the GuC that a previously enabled context has new work + * available. + * + * Context unpin: + * To unpin a context a H2G is used to disable scheduling. When the + * corresponding G2H returns indicating the scheduling disable operation has + * completed it is safe to unpin the context. While a disable is in flight it + * isn't safe to resubmit the context so a fence is used to stall all future + * requests of that context until the G2H is returned. + * + * Context deregistration: + * Before a context can be destroyed or if we steal its guc_id we must + * deregister the context with the GuC via H2G. If stealing the guc_id it isn't + * safe to submit anything to this guc_id until the deregister completes so a + * fence is used to stall all requests associated with this guc_id until the + * corresponding G2H returns indicating the guc_id has been deregistered. + * + * guc_ids: + * Unique number associated with private GuC context data passed in during + * context registration / submission / deregistration. 64k available. Simple ida + * is used for allocation. + * + * Stealing guc_ids: + * If no guc_ids are available they can be stolen from another context at + * request creation time if that context is unpinned. If a guc_id can't be found + * we punt this problem to the user as we believe this is near impossible to hit + * during normal use cases. + * + * Locking: + * In the GuC submission code we have 3 basic spin locks which protect + * everything. Details about each below. + * + * sched_engine->lock + * This is the submission lock for all contexts that share an i915 schedule + * engine (sched_engine), thus only one of the contexts which share a + * sched_engine can be submitting at a time. Currently only one sched_engine is + * used for all of GuC submission but that could change in the future. + * + * guc->contexts_lock + * Protects guc_id allocation for the given GuC, i.e. only one context can be + * doing guc_id allocation operations at a time for each GuC in the system. + * + * ce->guc_state.lock + * Protects everything under ce->guc_state. Ensures that a context is in the + * correct state before issuing a H2G. e.g. We don't issue a schedule disable + * on a disabled context (bad idea), we don't issue a schedule enable when a + * schedule disable is in flight, etc... Also protects list of inflight requests + * on the context and the priority management state. Lock is individual to each + * context. + * + * Lock ordering rules: + * sched_engine->lock -> ce->guc_state.lock + * guc->contexts_lock -> ce->guc_state.lock * + * Reset races: + * When a full GT reset is triggered it is assumed that some G2H responses to + * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be + * fatal as we do certain operations upon receiving a G2H (e.g. destroy + * contexts, release guc_ids, etc...). When this occurs we can scrub the + * context state and cleanup appropriately, however this is quite racey. + * To avoid races, the reset code must disable submission before scrubbing for + * the missing G2H, while the submission code must check for submission being + * disabled and skip sending H2Gs and updating context states when it is. Both + * sides must also make sure to hold the relevant locks. */ /* GuC Virtual Engine */ diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index d818cfbfc41d..7bd9ed20623e 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -290,18 +290,23 @@ struct i915_request { struct hrtimer timer; } watchdog; - /* - * Requests may need to be stalled when using GuC submission waiting for - * certain GuC operations to complete. If that is the case, stalled - * requests are added to a per context list of stalled requests. The - * below list_head is the link in that list. + /** + * @guc_fence_link: Requests may need to be stalled when using GuC + * submission waiting for certain GuC operations to complete. If that is + * the case, stalled requests are added to a per context list of stalled + * requests. The below list_head is the link in that list. Protected by + * ce->guc_state.lock. */ struct list_head guc_fence_link; /** - * Priority level while the request is inflight. Differs from i915 - * scheduler priority. See comment above - * I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP for details. + * @guc_prio: Priority level while the request is in flight. Differs + * from i915 scheduler priority. See comment above + * I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP for details. Protected by + * ce->guc_active.lock. Two special values (GUC_PRIO_INIT and + * GUC_PRIO_FINI) outside the GuC priority range are used to indicate + * if the priority has not been initialized yet or if no more updates + * are possible because the request has completed. */ #define GUC_PRIO_INIT 0xff #define GUC_PRIO_FINI 0xfe -- cgit v1.2.3-70-g09d2 From 4796054b381a586f4177a24e3d8b5a6a0a32ce62 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 24 Aug 2021 15:54:25 -0700 Subject: drm/i915/selftests: Do not use import_obj uninitialized MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clang warns a couple of times: drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:63:6: warning: variable 'import_obj' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] if (import != &obj->base) { ^~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:80:22: note: uninitialized use occurs here i915_gem_object_put(import_obj); ^~~~~~~~~~ drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:63:2: note: remove the 'if' if its condition is always false if (import != &obj->base) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:38:46: note: initialize the variable 'import_obj' to silence this warning struct drm_i915_gem_object *obj, *import_obj; ^ = NULL Shuffle the import_obj initialization above these if statements so that it is not used uninitialized. Fixes: d7b2cb380b3a ("drm/i915/gem: Correct the locking and pin pattern for dma-buf (v8)") Reported-by: Dan Carpenter Reviewed-by: Thomas Hellström Signed-off-by: Nathan Chancellor Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210824225427.2065517-2-nathan@kernel.org --- drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index ffae7df5e4d7..532c7955b300 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -59,13 +59,13 @@ static int igt_dmabuf_import_self(void *arg) err = PTR_ERR(import); goto out_dmabuf; } + import_obj = to_intel_bo(import); if (import != &obj->base) { pr_err("i915_gem_prime_import created a new object!\n"); err = -EINVAL; goto out_import; } - import_obj = to_intel_bo(import); i915_gem_object_lock(import_obj, NULL); err = __i915_gem_object_get_pages(import_obj); @@ -176,6 +176,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, err = PTR_ERR(import); goto out_dmabuf; } + import_obj = to_intel_bo(import); if (import == &obj->base) { pr_err("i915_gem_prime_import reused gem object!\n"); @@ -183,8 +184,6 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, goto out_import; } - import_obj = to_intel_bo(import); - i915_gem_object_lock(import_obj, NULL); err = __i915_gem_object_get_pages(import_obj); if (err) { -- cgit v1.2.3-70-g09d2 From 46f20a353b80d02492655d99714f0566018a17e8 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 24 Aug 2021 15:54:26 -0700 Subject: drm/i915/selftests: Always initialize err in igt_dmabuf_import_same_driver_lmem() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clang warns: drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:127:13: warning: variable 'err' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] } else if (PTR_ERR(import) != -EOPNOTSUPP) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:138:9: note: uninitialized use occurs here return err; ^~~ drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:127:9: note: remove the 'if' if its condition is always true } else if (PTR_ERR(import) != -EOPNOTSUPP) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:95:9: note: initialize the variable 'err' to silence this warning int err; ^ = 0 The test is expected to pass if i915_gem_prime_import() returns -EOPNOTSUPP so initialize err to zero in this case. Fixes: cdb35d1ed6d2 ("drm/i915/gem: Migrate to system at dma-buf attach time (v7)") Reported-by: Dan Carpenter Reviewed-by: Thomas Hellström Signed-off-by: Nathan Chancellor Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210824225427.2065517-3-nathan@kernel.org --- drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index 532c7955b300..4a6bb64c3a35 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -128,6 +128,8 @@ static int igt_dmabuf_import_same_driver_lmem(void *arg) pr_err("i915_gem_prime_import failed with the wrong err=%ld\n", PTR_ERR(import)); err = PTR_ERR(import); + } else { + err = 0; } dma_buf_put(dmabuf); -- cgit v1.2.3-70-g09d2 From 43192617f7816bb74584c1df06f57363afd15337 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 24 Aug 2021 15:54:27 -0700 Subject: drm/i915: Enable -Wsometimes-uninitialized This warning helps catch uninitialized variables. It should have been enabled at the same time as commit b2423184ac33 ("drm/i915: Enable -Wuninitialized") but I did not realize they were disabled separately. Enable it now that i915 is clean so that it stays that way. Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210824225427.2065517-4-nathan@kernel.org --- drivers/gpu/drm/i915/Makefile | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 949719ba048e..c584188aa15a 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -19,7 +19,6 @@ subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers) subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) # clang warnings subdir-ccflags-y += $(call cc-disable-warning, sign-compare) -subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized) subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) subdir-ccflags-y += $(call cc-disable-warning, frame-address) subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror -- cgit v1.2.3-70-g09d2 From 50bc6486a8f12643624cd3c48cd67fe49873849a Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 3 Sep 2021 17:35:43 -0700 Subject: drm/i915/xehpsdv: Define MOCS table for XeHP SDV Like DG1, XeHP SDV doesn't have LLC/eDRAM control values due to being a dgfx card. XeHP SDV adds 2 more bits: L3_GLBGO to "push the Go point to memory for L3 destined transaction" and L3_LKP to "enable Lookup for uncacheable accesses". Bspec: 45101 Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Signed-off-by: Stuart Summers Signed-off-by: Matt Roper Reviewed-by: Clint Taylor Link: https://patchwork.freedesktop.org/patch/msgid/20210904003544.2422282-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_mocs.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index f47521b8e941..8c3fbf4f323b 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -42,6 +42,8 @@ struct drm_i915_mocs_table { #define L3_ESC(value) ((value) << 0) #define L3_SCC(value) ((value) << 1) #define _L3_CACHEABILITY(value) ((value) << 4) +#define L3_GLBGO(value) ((value) << 6) +#define L3_LKUP(value) ((value) << 7) /* Helper defines */ #define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ @@ -343,6 +345,31 @@ static const struct drm_i915_mocs_entry gen12_mocs_table[] = { L3_3_WB), }; +static const struct drm_i915_mocs_entry xehpsdv_mocs_table[] = { + /* wa_1608975824 */ + MOCS_ENTRY(0, 0, L3_3_WB | L3_LKUP(1)), + + /* UC - Coherent; GO:L3 */ + MOCS_ENTRY(1, 0, L3_1_UC | L3_LKUP(1)), + /* UC - Coherent; GO:Memory */ + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Non-Coherent; GO:Memory */ + MOCS_ENTRY(3, 0, L3_1_UC | L3_GLBGO(1)), + /* UC - Non-Coherent; GO:L3 */ + MOCS_ENTRY(4, 0, L3_1_UC), + + /* WB */ + MOCS_ENTRY(5, 0, L3_3_WB | L3_LKUP(1)), + + /* HW Reserved - SW program but never use. */ + MOCS_ENTRY(48, 0, L3_3_WB | L3_LKUP(1)), + MOCS_ENTRY(49, 0, L3_1_UC | L3_LKUP(1)), + MOCS_ENTRY(60, 0, L3_1_UC), + MOCS_ENTRY(61, 0, L3_1_UC), + MOCS_ENTRY(62, 0, L3_1_UC), + MOCS_ENTRY(63, 0, L3_1_UC), +}; + enum { HAS_GLOBAL_MOCS = BIT(0), HAS_ENGINE_MOCS = BIT(1), @@ -372,7 +399,13 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); table->unused_entries_index = I915_MOCS_PTE; - if (IS_DG1(i915)) { + if (IS_XEHPSDV(i915)) { + table->size = ARRAY_SIZE(xehpsdv_mocs_table); + table->table = xehpsdv_mocs_table; + table->uc_index = 2; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->unused_entries_index = 5; + } else if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; table->uc_index = 1; -- cgit v1.2.3-70-g09d2 From e935405102783219b883b1e50539908f21463e9a Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 3 Sep 2021 17:35:44 -0700 Subject: drm/i915/dg2: Define MOCS table for DG2 Bspec: 45101, 45427 Cc: Ramalingam C Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20210904003544.2422282-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_mocs.c | 37 +++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 8c3fbf4f323b..e4b97cd14cf9 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -370,6 +370,30 @@ static const struct drm_i915_mocs_entry xehpsdv_mocs_table[] = { MOCS_ENTRY(63, 0, L3_1_UC), }; +static const struct drm_i915_mocs_entry dg2_mocs_table[] = { + /* UC - Coherent; GO:L3 */ + MOCS_ENTRY(0, 0, L3_1_UC | L3_LKUP(1)), + /* UC - Coherent; GO:Memory */ + MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Non-Coherent; GO:Memory */ + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), + + /* WB - LC */ + MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), +}; + +static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = { + /* Wa_14011441408: Set Go to Memory for MOCS#0 */ + MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Coherent; GO:Memory */ + MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Non-Coherent; GO:Memory */ + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), + + /* WB - LC */ + MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), +}; + enum { HAS_GLOBAL_MOCS = BIT(0), HAS_ENGINE_MOCS = BIT(1), @@ -399,7 +423,18 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); table->unused_entries_index = I915_MOCS_PTE; - if (IS_XEHPSDV(i915)) { + if (IS_DG2(i915)) { + if (IS_DG2_GT_STEP(i915, G10, STEP_A0, STEP_B0)) { + table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax); + table->table = dg2_mocs_table_g10_ax; + } else { + table->size = ARRAY_SIZE(dg2_mocs_table); + table->table = dg2_mocs_table; + } + table->uc_index = 1; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->unused_entries_index = 3; + } else if (IS_XEHPSDV(i915)) { table->size = ARRAY_SIZE(xehpsdv_mocs_table); table->table = xehpsdv_mocs_table; table->uc_index = 2; -- cgit v1.2.3-70-g09d2 From ce079f6d87ccd7129675c48067dd49329a123ce6 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 15 Sep 2021 12:59:46 +0200 Subject: drm/i915: Add mmap lock around vma_lookup() in the mman selftest. Add mmap_read_lock/unlock around vma_lookup(). The core code requires this for lookups. Since we only check if the return value is NULL, we can immediately unlock. This fixes the following splat in the selftes: i915: Running i915_gem_mman_live_selftests/igt_mmap ------------[ cut here ]------------ WARNING: CPU: 3 PID: 5654 at include/linux/mmap_lock.h:164 find_vma+0x4e/0xb0 Modules linked in: i915(+) vgem fuse snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio mei_hdcp x86_pkg_temp_thermal coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_intel_dspcfg snd_hda_codec snd_hwdep e1000e snd_hda_core ptp snd_pcm ttm mei_me pps_core i2c_i801 prime_numbers i2c_smbus mei [last unloaded: i915] CPU: 3 PID: 5654 Comm: i915_selftest Tainted: G U 5.15.0-rc1-CI-Trybot_7984+ #1 Hardware name: Micro-Star International Co., Ltd. MS-7B54/Z370M MORTAR (MS-7B54), BIOS 1.00 10/31/2017 RIP: 0010:find_vma+0x4e/0xb0 Code: de 48 89 ef e8 d3 94 fe ff 48 85 c0 74 34 48 83 c4 08 5b 5d c3 48 8d bf 28 01 00 00 be ff ff ff ff e8 d6 46 8b 00 85 c0 75 c8 <0f> 0b 48 8b 85 b8 00 00 00 48 85 c0 75 c6 48 89 ef e8 12 26 87 00 RSP: 0018:ffffc900013df980 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 00007f9df2b80000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffffff822e314c RDI: ffffffff8233c83f RBP: ffff88811bafc840 R08: ffff888107d0ddb8 R09: 00000000fffffffe R10: 0000000000000001 R11: 00000000ffbae7ba R12: 0000000000000000 R13: 0000000000000000 R14: ffff88812a710000 R15: ffff888114fa42c0 FS: 00007f9def9d4c00(0000) GS:ffff888266580000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f799627fe50 CR3: 000000011bbc2006 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __igt_mmap+0xe0/0x490 [i915] igt_mmap+0xd2/0x160 [i915] ? __trace_bprintk+0x6e/0x80 __i915_subtests.cold.7+0x42/0x92 [i915] ? i915_perf_selftests+0x20/0x20 [i915] ? __i915_nop_setup+0x10/0x10 [i915] __run_selftests.part.3+0x10d/0x172 [i915] i915_live_selftests.cold.5+0x1f/0x47 [i915] i915_pci_probe+0x93/0x1d0 [i915] Signed-off-by: Maarten Lankhorst Closes: https://gitlab.freedesktop.org/drm/intel/issues/4129 Link: https://patchwork.freedesktop.org/patch/msgid/20210915105946.394412-1-maarten.lankhorst@linux.intel.com Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index a2c34e5a1c54..6d30cdfa80f3 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -903,7 +903,9 @@ static int __igt_mmap(struct drm_i915_private *i915, pr_debug("igt_mmap(%s, %d) @ %lx\n", obj->mm.region->name, type, addr); + mmap_read_lock(current->mm); area = vma_lookup(current->mm, addr); + mmap_read_unlock(current->mm); if (!area) { pr_err("%s: Did not create a vm_area_struct for the mmap\n", obj->mm.region->name); -- cgit v1.2.3-70-g09d2 From dc34ca9231f2631e635a4737242bc0f7fe5c4a45 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Fri, 3 Sep 2021 16:28:37 +0200 Subject: drm/i915: Mark GPU wedging on driver unregister unrecoverable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GPU wedged flag now set on driver unregister to prevent from further using the GPU can be then cleared unintentionally when calling __intel_gt_unset_wedged() still before the flag is finally marked unrecoverable. We need to have it marked unrecoverable earlier. Implement that by replacing a call to intel_gt_set_wedged() in intel_gt_driver_unregister() with intel_gt_set_wedged_on_fini(). With the above in place, intel_gt_set_wedged_on_fini() is now called twice on driver remove, second time from __intel_gt_disable(). This seems harmless, while dropping intel_gt_set_wedged_on_fini() from __intel_gt_disable() proved to break some driver probe error unwind paths as well as mock selftest exit path. Signed-off-by: Janusz Krzysztofik Cc: Michał Winiarski Reviewed-by: Michał Winiarski Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210903142837.216978-1-janusz.krzysztofik@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 2aeaae036a6f..55e87aff51d2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -752,7 +752,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt) * all in-flight requests so that we can quickly unbind the active * resources. */ - intel_gt_set_wedged(gt); + intel_gt_set_wedged_on_fini(gt); /* Scrub all HW state upon release */ with_intel_runtime_pm(gt->uncore->rpm, wakeref) -- cgit v1.2.3-70-g09d2 From 48b0961269546716c3232748bf37e64e49fb866c Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 30 Aug 2021 14:09:48 +0200 Subject: drm/i915: Move __i915_gem_free_object to ttm_bo_destroy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we implement delayed destroy, we may have a second call to the delete_mem_notify() handler, while free_object() only should be called once. Move it to bo->destroy(), to ensure it's only called once. This fixes some weird memory corruption issues with delayed destroy when async eviction is used. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20210830121006.2978297-2-maarten.lankhorst@linux.intel.com Fixes: 213d50927763 ("drm/i915/ttm: Introduce a TTM i915 gem object backend") Cc: Thomas Hellström Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 3225fda1f0f6..1e9fb126cb7b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -356,11 +356,8 @@ static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - if (likely(obj)) { - /* This releases all gem object bindings to the backend. */ + if (likely(obj)) i915_ttm_free_cached_io_st(obj); - __i915_gem_free_object(obj); - } } static struct intel_memory_region * @@ -886,8 +883,12 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + /* This releases all gem object bindings to the backend. */ + __i915_gem_free_object(obj); + i915_gem_object_release_memory_region(obj); mutex_destroy(&obj->ttm.get_io_page.lock); + if (obj->ttm.created) call_rcu(&obj->rcu, __i915_gem_free_object_rcu); } -- cgit v1.2.3-70-g09d2 From 12235da8c80a1f9909008e4ca6036d5772b81192 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 9 Sep 2021 11:32:18 +0200 Subject: kernel/locking: Add context to ww_mutex_trylock() i915 will soon gain an eviction path that trylock a whole lot of locks for eviction, getting dmesg failures like below: BUG: MAX_LOCK_DEPTH too low! turning off the locking correctness validator. depth: 48 max: 48! 48 locks held by i915_selftest/5776: #0: ffff888101a79240 (&dev->mutex){....}-{3:3}, at: __driver_attach+0x88/0x160 #1: ffffc900009778c0 (reservation_ww_class_acquire){+.+.}-{0:0}, at: i915_vma_pin.constprop.63+0x39/0x1b0 [i915] #2: ffff88800cf74de8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: i915_vma_pin.constprop.63+0x5f/0x1b0 [i915] #3: ffff88810c7f9e38 (&vm->mutex/1){+.+.}-{3:3}, at: i915_vma_pin_ww+0x1c4/0x9d0 [i915] #4: ffff88810bad5768 (reservation_ww_class_mutex){+.+.}-{3:3}, at: i915_gem_evict_something+0x110/0x860 [i915] #5: ffff88810bad60e8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: i915_gem_evict_something+0x110/0x860 [i915] ... #46: ffff88811964d768 (reservation_ww_class_mutex){+.+.}-{3:3}, at: i915_gem_evict_something+0x110/0x860 [i915] #47: ffff88811964e0e8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: i915_gem_evict_something+0x110/0x860 [i915] INFO: lockdep is turned off. Fixing eviction to nest into ww_class_acquire is a high priority, but it requires a rework of the entire driver, which can only be done one step at a time. As an intermediate solution, add an acquire context to ww_mutex_trylock, which allows us to do proper nesting annotations on the trylocks, making the above lockdep splat disappear. This is also useful in regulator_lock_nested, which may avoid dropping regulator_nesting_mutex in the uncontended path, so use it there. TTM may be another user for this, where we could lock a buffer in a fastpath with list locks held, without dropping all locks we hold. [peterz: rework actual ww_mutex_trylock() implementations] Signed-off-by: Maarten Lankhorst Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/YUBGPdDDjKlxAuXJ@hirez.programming.kicks-ass.net --- drivers/gpu/drm/drm_modeset_lock.c | 2 +- drivers/regulator/core.c | 2 +- include/linux/dma-resv.h | 2 +- include/linux/ww_mutex.h | 15 +------ kernel/locking/mutex.c | 41 ++++++++++++++++++ kernel/locking/test-ww_mutex.c | 86 ++++++++++++++++++++++++++++---------- kernel/locking/ww_rt_mutex.c | 25 +++++++++++ lib/locking-selftest.c | 2 +- 8 files changed, 137 insertions(+), 38 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c index fcfe1a03c4a1..bf8a6e823a15 100644 --- a/drivers/gpu/drm/drm_modeset_lock.c +++ b/drivers/gpu/drm/drm_modeset_lock.c @@ -248,7 +248,7 @@ static inline int modeset_lock(struct drm_modeset_lock *lock, if (ctx->trylock_only) { lockdep_assert_held(&ctx->ww_ctx); - if (!ww_mutex_trylock(&lock->mutex)) + if (!ww_mutex_trylock(&lock->mutex, NULL)) return -EBUSY; else return 0; diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index ca6caba8a191..f4d441b1a8bf 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -145,7 +145,7 @@ static inline int regulator_lock_nested(struct regulator_dev *rdev, mutex_lock(®ulator_nesting_mutex); - if (ww_ctx || !ww_mutex_trylock(&rdev->mutex)) { + if (!ww_mutex_trylock(&rdev->mutex, ww_ctx)) { if (rdev->mutex_owner == current) rdev->ref_cnt++; else diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index e1ca2080a1ff..39fefb86780b 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -173,7 +173,7 @@ static inline int dma_resv_lock_slow_interruptible(struct dma_resv *obj, */ static inline bool __must_check dma_resv_trylock(struct dma_resv *obj) { - return ww_mutex_trylock(&obj->lock); + return ww_mutex_trylock(&obj->lock, NULL); } /** diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 29db736af86d..bb763085479a 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -28,12 +28,10 @@ #ifndef CONFIG_PREEMPT_RT #define WW_MUTEX_BASE mutex #define ww_mutex_base_init(l,n,k) __mutex_init(l,n,k) -#define ww_mutex_base_trylock(l) mutex_trylock(l) #define ww_mutex_base_is_locked(b) mutex_is_locked((b)) #else #define WW_MUTEX_BASE rt_mutex #define ww_mutex_base_init(l,n,k) __rt_mutex_init(l,n,k) -#define ww_mutex_base_trylock(l) rt_mutex_trylock(l) #define ww_mutex_base_is_locked(b) rt_mutex_base_is_locked(&(b)->rtmutex) #endif @@ -339,17 +337,8 @@ ww_mutex_lock_slow_interruptible(struct ww_mutex *lock, extern void ww_mutex_unlock(struct ww_mutex *lock); -/** - * ww_mutex_trylock - tries to acquire the w/w mutex without acquire context - * @lock: mutex to lock - * - * Trylocks a mutex without acquire context, so no deadlock detection is - * possible. Returns 1 if the mutex has been acquired successfully, 0 otherwise. - */ -static inline int __must_check ww_mutex_trylock(struct ww_mutex *lock) -{ - return ww_mutex_base_trylock(&lock->base); -} +extern int __must_check ww_mutex_trylock(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx); /*** * ww_mutex_destroy - mark a w/w mutex unusable diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index d456579d0952..2fede72b6af5 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -94,6 +94,9 @@ static inline unsigned long __owner_flags(unsigned long owner) return owner & MUTEX_FLAGS; } +/* + * Returns: __mutex_owner(lock) on failure or NULL on success. + */ static inline struct task_struct *__mutex_trylock_common(struct mutex *lock, bool handoff) { unsigned long owner, curr = (unsigned long)current; @@ -736,6 +739,44 @@ __ww_mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass, return __mutex_lock_common(lock, state, subclass, NULL, ip, ww_ctx, true); } +/** + * ww_mutex_trylock - tries to acquire the w/w mutex with optional acquire context + * @ww: mutex to lock + * @ww_ctx: optional w/w acquire context + * + * Trylocks a mutex with the optional acquire context; no deadlock detection is + * possible. Returns 1 if the mutex has been acquired successfully, 0 otherwise. + * + * Unlike ww_mutex_lock, no deadlock handling is performed. However, if a @ctx is + * specified, -EALREADY handling may happen in calls to ww_mutex_trylock. + * + * A mutex acquired with this function must be released with ww_mutex_unlock. + */ +int ww_mutex_trylock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) +{ + if (!ww_ctx) + return mutex_trylock(&ww->base); + + MUTEX_WARN_ON(ww->base.magic != &ww->base); + + /* + * Reset the wounded flag after a kill. No other process can + * race and wound us here, since they can't have a valid owner + * pointer if we don't have any locks held. + */ + if (ww_ctx->acquired == 0) + ww_ctx->wounded = 0; + + if (__mutex_trylock(&ww->base)) { + ww_mutex_set_context_fastpath(ww, ww_ctx); + mutex_acquire_nest(&ww->base.dep_map, 0, 1, &ww_ctx->dep_map, _RET_IP_); + return 1; + } + + return 0; +} +EXPORT_SYMBOL(ww_mutex_trylock); + #ifdef CONFIG_DEBUG_LOCK_ALLOC void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass) diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 3e82f449b4ff..d63ac411f367 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -16,6 +16,15 @@ static DEFINE_WD_CLASS(ww_class); struct workqueue_struct *wq; +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH +#define ww_acquire_init_noinject(a, b) do { \ + ww_acquire_init((a), (b)); \ + (a)->deadlock_inject_countdown = ~0U; \ + } while (0) +#else +#define ww_acquire_init_noinject(a, b) ww_acquire_init((a), (b)) +#endif + struct test_mutex { struct work_struct work; struct ww_mutex mutex; @@ -36,7 +45,7 @@ static void test_mutex_work(struct work_struct *work) wait_for_completion(&mtx->go); if (mtx->flags & TEST_MTX_TRY) { - while (!ww_mutex_trylock(&mtx->mutex)) + while (!ww_mutex_trylock(&mtx->mutex, NULL)) cond_resched(); } else { ww_mutex_lock(&mtx->mutex, NULL); @@ -109,19 +118,38 @@ static int test_mutex(void) return 0; } -static int test_aa(void) +static int test_aa(bool trylock) { struct ww_mutex mutex; struct ww_acquire_ctx ctx; int ret; + const char *from = trylock ? "trylock" : "lock"; ww_mutex_init(&mutex, &ww_class); ww_acquire_init(&ctx, &ww_class); - ww_mutex_lock(&mutex, &ctx); + if (!trylock) { + ret = ww_mutex_lock(&mutex, &ctx); + if (ret) { + pr_err("%s: initial lock failed!\n", __func__); + goto out; + } + } else { + if (!ww_mutex_trylock(&mutex, &ctx)) { + pr_err("%s: initial trylock failed!\n", __func__); + goto out; + } + } - if (ww_mutex_trylock(&mutex)) { - pr_err("%s: trylocked itself!\n", __func__); + if (ww_mutex_trylock(&mutex, NULL)) { + pr_err("%s: trylocked itself without context from %s!\n", __func__, from); + ww_mutex_unlock(&mutex); + ret = -EINVAL; + goto out; + } + + if (ww_mutex_trylock(&mutex, &ctx)) { + pr_err("%s: trylocked itself with context from %s!\n", __func__, from); ww_mutex_unlock(&mutex); ret = -EINVAL; goto out; @@ -129,17 +157,17 @@ static int test_aa(void) ret = ww_mutex_lock(&mutex, &ctx); if (ret != -EALREADY) { - pr_err("%s: missed deadlock for recursing, ret=%d\n", - __func__, ret); + pr_err("%s: missed deadlock for recursing, ret=%d from %s\n", + __func__, ret, from); if (!ret) ww_mutex_unlock(&mutex); ret = -EINVAL; goto out; } + ww_mutex_unlock(&mutex); ret = 0; out: - ww_mutex_unlock(&mutex); ww_acquire_fini(&ctx); return ret; } @@ -150,7 +178,7 @@ struct test_abba { struct ww_mutex b_mutex; struct completion a_ready; struct completion b_ready; - bool resolve; + bool resolve, trylock; int result; }; @@ -160,8 +188,13 @@ static void test_abba_work(struct work_struct *work) struct ww_acquire_ctx ctx; int err; - ww_acquire_init(&ctx, &ww_class); - ww_mutex_lock(&abba->b_mutex, &ctx); + ww_acquire_init_noinject(&ctx, &ww_class); + if (!abba->trylock) + ww_mutex_lock(&abba->b_mutex, &ctx); + else + WARN_ON(!ww_mutex_trylock(&abba->b_mutex, &ctx)); + + WARN_ON(READ_ONCE(abba->b_mutex.ctx) != &ctx); complete(&abba->b_ready); wait_for_completion(&abba->a_ready); @@ -181,7 +214,7 @@ static void test_abba_work(struct work_struct *work) abba->result = err; } -static int test_abba(bool resolve) +static int test_abba(bool trylock, bool resolve) { struct test_abba abba; struct ww_acquire_ctx ctx; @@ -192,12 +225,18 @@ static int test_abba(bool resolve) INIT_WORK_ONSTACK(&abba.work, test_abba_work); init_completion(&abba.a_ready); init_completion(&abba.b_ready); + abba.trylock = trylock; abba.resolve = resolve; schedule_work(&abba.work); - ww_acquire_init(&ctx, &ww_class); - ww_mutex_lock(&abba.a_mutex, &ctx); + ww_acquire_init_noinject(&ctx, &ww_class); + if (!trylock) + ww_mutex_lock(&abba.a_mutex, &ctx); + else + WARN_ON(!ww_mutex_trylock(&abba.a_mutex, &ctx)); + + WARN_ON(READ_ONCE(abba.a_mutex.ctx) != &ctx); complete(&abba.a_ready); wait_for_completion(&abba.b_ready); @@ -249,7 +288,7 @@ static void test_cycle_work(struct work_struct *work) struct ww_acquire_ctx ctx; int err, erra = 0; - ww_acquire_init(&ctx, &ww_class); + ww_acquire_init_noinject(&ctx, &ww_class); ww_mutex_lock(&cycle->a_mutex, &ctx); complete(cycle->a_signal); @@ -581,7 +620,9 @@ static int stress(int nlocks, int nthreads, unsigned int flags) static int __init test_ww_mutex_init(void) { int ncpus = num_online_cpus(); - int ret; + int ret, i; + + printk(KERN_INFO "Beginning ww mutex selftests\n"); wq = alloc_workqueue("test-ww_mutex", WQ_UNBOUND, 0); if (!wq) @@ -591,17 +632,19 @@ static int __init test_ww_mutex_init(void) if (ret) return ret; - ret = test_aa(); + ret = test_aa(false); if (ret) return ret; - ret = test_abba(false); + ret = test_aa(true); if (ret) return ret; - ret = test_abba(true); - if (ret) - return ret; + for (i = 0; i < 4; i++) { + ret = test_abba(i & 1, i & 2); + if (ret) + return ret; + } ret = test_cycle(ncpus); if (ret) @@ -619,6 +662,7 @@ static int __init test_ww_mutex_init(void) if (ret) return ret; + printk(KERN_INFO "All ww mutex selftests passed\n"); return 0; } diff --git a/kernel/locking/ww_rt_mutex.c b/kernel/locking/ww_rt_mutex.c index 3f1fff7d2780..0e00205cf467 100644 --- a/kernel/locking/ww_rt_mutex.c +++ b/kernel/locking/ww_rt_mutex.c @@ -9,6 +9,31 @@ #define WW_RT #include "rtmutex.c" +int ww_mutex_trylock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx) +{ + struct rt_mutex *rtm = &lock->base; + + if (!ww_ctx) + return rt_mutex_trylock(rtm); + + /* + * Reset the wounded flag after a kill. No other process can + * race and wound us here, since they can't have a valid owner + * pointer if we don't have any locks held. + */ + if (ww_ctx->acquired == 0) + ww_ctx->wounded = 0; + + if (__rt_mutex_trylock(&rtm->rtmutex)) { + ww_mutex_set_context_fastpath(lock, ww_ctx); + mutex_acquire_nest(&rtm->dep_map, 0, 1, ww_ctx->dep_map, _RET_IP_); + return 1; + } + + return 0; +} +EXPORT_SYMBOL(ww_mutex_trylock); + static int __sched __ww_rt_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx, unsigned int state, unsigned long ip) diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 161108e5d2fe..71652e1c397c 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -258,7 +258,7 @@ static void init_shared_classes(void) #define WWAF(x) ww_acquire_fini(x) #define WWL(x, c) ww_mutex_lock(x, c) -#define WWT(x) ww_mutex_trylock(x) +#define WWT(x) ww_mutex_trylock(x, NULL) #define WWL1(x) ww_mutex_lock(x, NULL) #define WWU(x) ww_mutex_unlock(x) -- cgit v1.2.3-70-g09d2 From 022f324c9934cc9e603923121f108eb8623a986c Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 17 Sep 2021 19:57:51 -0700 Subject: drm/i915: rename debugfs_gt files We shouldn't be using debugfs_ namespace for this functionality. Rename debugfs_gt.[ch] to intel_gt_debugfs.[ch] and then make functions, defines and structs follow suit. While at it and since we are renaming the header, sort the includes alphabetically. Signed-off-by: Lucas De Marchi Acked-by: Jani Nikula Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210918025754.1254705-1-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/Makefile | 2 +- drivers/gpu/drm/i915/gt/debugfs_engines.c | 6 +-- drivers/gpu/drm/i915/gt/debugfs_gt.c | 47 ---------------------- drivers/gpu/drm/i915/gt/debugfs_gt.h | 38 ----------------- drivers/gpu/drm/i915/gt/debugfs_gt_pm.c | 14 +++---- drivers/gpu/drm/i915/gt/intel_gt.c | 6 +-- drivers/gpu/drm/i915/gt/intel_gt_debugfs.c | 47 ++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_debugfs.h | 38 +++++++++++++++++ drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c | 10 ++--- drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c | 18 ++++----- drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c | 8 ++-- drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c | 6 +-- drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c | 6 +-- 13 files changed, 123 insertions(+), 123 deletions(-) delete mode 100644 drivers/gpu/drm/i915/gt/debugfs_gt.c delete mode 100644 drivers/gpu/drm/i915/gt/debugfs_gt.h create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_debugfs.c create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_debugfs.h (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 335ba9f43d8f..7eab4edae2b6 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -79,7 +79,6 @@ i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o # "Graphics Technology" (aka we talk to the gpu) gt-y += \ gt/debugfs_engines.o \ - gt/debugfs_gt.o \ gt/debugfs_gt_pm.o \ gt/gen2_engine_cs.o \ gt/gen6_engine_cs.o \ @@ -100,6 +99,7 @@ gt-y += \ gt/intel_gt.o \ gt/intel_gt_buffer_pool.o \ gt/intel_gt_clock_utils.o \ + gt/intel_gt_debugfs.o \ gt/intel_gt_irq.o \ gt/intel_gt_pm.o \ gt/intel_gt_pm_irq.o \ diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.c b/drivers/gpu/drm/i915/gt/debugfs_engines.c index 5e3725e62241..2980dac5b171 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_engines.c +++ b/drivers/gpu/drm/i915/gt/debugfs_engines.c @@ -7,9 +7,9 @@ #include #include "debugfs_engines.h" -#include "debugfs_gt.h" #include "i915_drv.h" /* for_each_engine! */ #include "intel_engine.h" +#include "intel_gt_debugfs.h" static int engines_show(struct seq_file *m, void *data) { @@ -24,11 +24,11 @@ static int engines_show(struct seq_file *m, void *data) return 0; } -DEFINE_GT_DEBUGFS_ATTRIBUTE(engines); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(engines); void debugfs_engines_register(struct intel_gt *gt, struct dentry *root) { - static const struct debugfs_gt_file files[] = { + static const struct intel_gt_debugfs_file files[] = { { "engines", &engines_fops }, }; diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.c b/drivers/gpu/drm/i915/gt/debugfs_gt.c deleted file mode 100644 index 591eb60785db..000000000000 --- a/drivers/gpu/drm/i915/gt/debugfs_gt.c +++ /dev/null @@ -1,47 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - */ - -#include - -#include "debugfs_engines.h" -#include "debugfs_gt.h" -#include "debugfs_gt_pm.h" -#include "intel_sseu_debugfs.h" -#include "uc/intel_uc_debugfs.h" -#include "i915_drv.h" - -void debugfs_gt_register(struct intel_gt *gt) -{ - struct dentry *root; - - if (!gt->i915->drm.primary->debugfs_root) - return; - - root = debugfs_create_dir("gt", gt->i915->drm.primary->debugfs_root); - if (IS_ERR(root)) - return; - - debugfs_engines_register(gt, root); - debugfs_gt_pm_register(gt, root); - intel_sseu_debugfs_register(gt, root); - - intel_uc_debugfs_register(>->uc, root); -} - -void intel_gt_debugfs_register_files(struct dentry *root, - const struct debugfs_gt_file *files, - unsigned long count, void *data) -{ - while (count--) { - umode_t mode = files->fops->write ? 0644 : 0444; - - if (!files->eval || files->eval(data)) - debugfs_create_file(files->name, - mode, root, data, - files->fops); - - files++; - } -} diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.h b/drivers/gpu/drm/i915/gt/debugfs_gt.h deleted file mode 100644 index f77540f727e9..000000000000 --- a/drivers/gpu/drm/i915/gt/debugfs_gt.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2019 Intel Corporation - */ - -#ifndef DEBUGFS_GT_H -#define DEBUGFS_GT_H - -#include - -struct intel_gt; - -#define DEFINE_GT_DEBUGFS_ATTRIBUTE(__name) \ - static int __name ## _open(struct inode *inode, struct file *file) \ -{ \ - return single_open(file, __name ## _show, inode->i_private); \ -} \ -static const struct file_operations __name ## _fops = { \ - .owner = THIS_MODULE, \ - .open = __name ## _open, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ -} - -void debugfs_gt_register(struct intel_gt *gt); - -struct debugfs_gt_file { - const char *name; - const struct file_operations *fops; - bool (*eval)(void *data); -}; - -void intel_gt_debugfs_register_files(struct dentry *root, - const struct debugfs_gt_file *files, - unsigned long count, void *data); - -#endif /* DEBUGFS_GT_H */ diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c index f6733f279890..9222cf68c56c 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c @@ -6,11 +6,11 @@ #include -#include "debugfs_gt.h" #include "debugfs_gt_pm.h" #include "i915_drv.h" #include "intel_gt.h" #include "intel_gt_clock_utils.h" +#include "intel_gt_debugfs.h" #include "intel_gt_pm.h" #include "intel_llc.h" #include "intel_rc6.h" @@ -36,7 +36,7 @@ static int fw_domains_show(struct seq_file *m, void *data) return 0; } -DEFINE_GT_DEBUGFS_ATTRIBUTE(fw_domains); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains); static void print_rc6_res(struct seq_file *m, const char *title, @@ -238,7 +238,7 @@ static int drpc_show(struct seq_file *m, void *unused) return err; } -DEFINE_GT_DEBUGFS_ATTRIBUTE(drpc); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(drpc); static int frequency_show(struct seq_file *m, void *unused) { @@ -480,7 +480,7 @@ static int frequency_show(struct seq_file *m, void *unused) return 0; } -DEFINE_GT_DEBUGFS_ATTRIBUTE(frequency); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(frequency); static int llc_show(struct seq_file *m, void *data) { @@ -533,7 +533,7 @@ static bool llc_eval(void *data) return HAS_LLC(gt->i915); } -DEFINE_GT_DEBUGFS_ATTRIBUTE(llc); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(llc); static const char *rps_power_to_str(unsigned int power) { @@ -612,11 +612,11 @@ static bool rps_eval(void *data) return HAS_RPS(gt->i915); } -DEFINE_GT_DEBUGFS_ATTRIBUTE(rps_boost); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rps_boost); void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root) { - static const struct debugfs_gt_file files[] = { + static const struct intel_gt_debugfs_file files[] = { { "drpc", &drpc_fops, NULL }, { "frequency", &frequency_fops, NULL }, { "forcewake", &fw_domains_fops, NULL }, diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 55e87aff51d2..985922c03297 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -3,7 +3,7 @@ * Copyright © 2019 Intel Corporation */ -#include "debugfs_gt.h" +#include "intel_gt_debugfs.h" #include "gem/i915_gem_lmem.h" #include "i915_drv.h" @@ -15,11 +15,11 @@ #include "intel_gt_requests.h" #include "intel_migrate.h" #include "intel_mocs.h" +#include "intel_pm.h" #include "intel_rc6.h" #include "intel_renderstate.h" #include "intel_rps.h" #include "intel_uncore.h" -#include "intel_pm.h" #include "shmem_utils.h" void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) @@ -434,7 +434,7 @@ void intel_gt_driver_register(struct intel_gt *gt) { intel_rps_driver_register(>->rps); - debugfs_gt_register(gt); + intel_gt_debugfs_register(gt); } static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c new file mode 100644 index 000000000000..b19648008265 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include + +#include "debugfs_engines.h" +#include "debugfs_gt_pm.h" +#include "i915_drv.h" +#include "intel_gt_debugfs.h" +#include "intel_sseu_debugfs.h" +#include "uc/intel_uc_debugfs.h" + +void intel_gt_debugfs_register(struct intel_gt *gt) +{ + struct dentry *root; + + if (!gt->i915->drm.primary->debugfs_root) + return; + + root = debugfs_create_dir("gt", gt->i915->drm.primary->debugfs_root); + if (IS_ERR(root)) + return; + + debugfs_engines_register(gt, root); + debugfs_gt_pm_register(gt, root); + intel_sseu_debugfs_register(gt, root); + + intel_uc_debugfs_register(>->uc, root); +} + +void intel_gt_debugfs_register_files(struct dentry *root, + const struct intel_gt_debugfs_file *files, + unsigned long count, void *data) +{ + while (count--) { + umode_t mode = files->fops->write ? 0644 : 0444; + + if (!files->eval || files->eval(data)) + debugfs_create_file(files->name, + mode, root, data, + files->fops); + + files++; + } +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h new file mode 100644 index 000000000000..8b6fca09897c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_GT_DEBUGFS_H +#define INTEL_GT_DEBUGFS_H + +#include + +struct intel_gt; + +#define DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(__name) \ + static int __name ## _open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, __name ## _show, inode->i_private); \ +} \ +static const struct file_operations __name ## _fops = { \ + .owner = THIS_MODULE, \ + .open = __name ## _open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ +} + +void intel_gt_debugfs_register(struct intel_gt *gt); + +struct intel_gt_debugfs_file { + const char *name; + const struct file_operations *fops; + bool (*eval)(void *data); +}; + +void intel_gt_debugfs_register_files(struct dentry *root, + const struct intel_gt_debugfs_file *files, + unsigned long count, void *data); + +#endif /* INTEL_GT_DEBUGFS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c index 1ba8b7da9d37..8bb3a91dad82 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c @@ -4,9 +4,9 @@ * Copyright © 2020 Intel Corporation */ -#include "debugfs_gt.h" -#include "intel_sseu_debugfs.h" #include "i915_drv.h" +#include "intel_gt_debugfs.h" +#include "intel_sseu_debugfs.h" static void sseu_copy_subslices(const struct sseu_dev_info *sseu, int slice, u8 *to_mask) @@ -282,7 +282,7 @@ static int sseu_status_show(struct seq_file *m, void *unused) return intel_sseu_status(m, gt); } -DEFINE_GT_DEBUGFS_ATTRIBUTE(sseu_status); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(sseu_status); static int rcs_topology_show(struct seq_file *m, void *unused) { @@ -293,11 +293,11 @@ static int rcs_topology_show(struct seq_file *m, void *unused) return 0; } -DEFINE_GT_DEBUGFS_ATTRIBUTE(rcs_topology); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rcs_topology); void intel_sseu_debugfs_register(struct intel_gt *gt, struct dentry *root) { - static const struct debugfs_gt_file files[] = { + static const struct intel_gt_debugfs_file files[] = { { "sseu_status", &sseu_status_fops, NULL }, { "rcs_topology", &rcs_topology_fops, NULL }, }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c index 887c8c8f35db..25f09a420561 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c @@ -5,14 +5,14 @@ #include -#include "gt/debugfs_gt.h" +#include "gt/intel_gt_debugfs.h" +#include "gt/uc/intel_guc_ads.h" +#include "gt/uc/intel_guc_ct.h" +#include "gt/uc/intel_guc_slpc.h" +#include "gt/uc/intel_guc_submission.h" #include "intel_guc.h" #include "intel_guc_debugfs.h" #include "intel_guc_log_debugfs.h" -#include "gt/uc/intel_guc_ct.h" -#include "gt/uc/intel_guc_ads.h" -#include "gt/uc/intel_guc_submission.h" -#include "gt/uc/intel_guc_slpc.h" static int guc_info_show(struct seq_file *m, void *data) { @@ -35,7 +35,7 @@ static int guc_info_show(struct seq_file *m, void *data) return 0; } -DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_info); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_info); static int guc_registered_contexts_show(struct seq_file *m, void *data) { @@ -49,7 +49,7 @@ static int guc_registered_contexts_show(struct seq_file *m, void *data) return 0; } -DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_registered_contexts); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_registered_contexts); static int guc_slpc_info_show(struct seq_file *m, void *unused) { @@ -62,7 +62,7 @@ static int guc_slpc_info_show(struct seq_file *m, void *unused) return intel_guc_slpc_print_info(slpc, &p); } -DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_slpc_info); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_slpc_info); static bool intel_eval_slpc_support(void *data) { @@ -73,7 +73,7 @@ static bool intel_eval_slpc_support(void *data) void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root) { - static const struct debugfs_gt_file files[] = { + static const struct intel_gt_debugfs_file files[] = { { "guc_info", &guc_info_fops, NULL }, { "guc_registered_contexts", &guc_registered_contexts_fops, NULL }, { "guc_slpc_info", &guc_slpc_info_fops, &intel_eval_slpc_support}, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c index 64e0b86bf258..46026c2c1722 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c @@ -6,7 +6,7 @@ #include #include -#include "gt/debugfs_gt.h" +#include "gt/intel_gt_debugfs.h" #include "intel_guc.h" #include "intel_guc_log.h" #include "intel_guc_log_debugfs.h" @@ -17,7 +17,7 @@ static int guc_log_dump_show(struct seq_file *m, void *data) return intel_guc_log_dump(m->private, &p, false); } -DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_log_dump); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_log_dump); static int guc_load_err_log_dump_show(struct seq_file *m, void *data) { @@ -25,7 +25,7 @@ static int guc_load_err_log_dump_show(struct seq_file *m, void *data) return intel_guc_log_dump(m->private, &p, true); } -DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_load_err_log_dump); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_load_err_log_dump); static int guc_log_level_get(void *data, u64 *val) { @@ -109,7 +109,7 @@ static const struct file_operations guc_log_relay_fops = { void intel_guc_log_debugfs_register(struct intel_guc_log *log, struct dentry *root) { - static const struct debugfs_gt_file files[] = { + static const struct intel_gt_debugfs_file files[] = { { "guc_log_dump", &guc_log_dump_fops, NULL }, { "guc_load_err_log_dump", &guc_load_err_log_dump_fops, NULL }, { "guc_log_level", &guc_log_level_fops, NULL }, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c index 5733c15fd123..15998963b863 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c @@ -5,7 +5,7 @@ #include -#include "gt/debugfs_gt.h" +#include "gt/intel_gt_debugfs.h" #include "intel_huc.h" #include "intel_huc_debugfs.h" @@ -21,11 +21,11 @@ static int huc_info_show(struct seq_file *m, void *data) return 0; } -DEFINE_GT_DEBUGFS_ATTRIBUTE(huc_info); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(huc_info); void intel_huc_debugfs_register(struct intel_huc *huc, struct dentry *root) { - static const struct debugfs_gt_file files[] = { + static const struct intel_gt_debugfs_file files[] = { { "huc_info", &huc_info_fops, NULL }, }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c index 089d98662f46..c2f7924295e7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c @@ -6,7 +6,7 @@ #include #include -#include "gt/debugfs_gt.h" +#include "gt/intel_gt_debugfs.h" #include "intel_guc_debugfs.h" #include "intel_huc_debugfs.h" #include "intel_uc.h" @@ -32,11 +32,11 @@ static int uc_usage_show(struct seq_file *m, void *data) return 0; } -DEFINE_GT_DEBUGFS_ATTRIBUTE(uc_usage); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(uc_usage); void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root) { - static const struct debugfs_gt_file files[] = { + static const struct intel_gt_debugfs_file files[] = { { "usage", &uc_usage_fops, NULL }, }; struct dentry *root; -- cgit v1.2.3-70-g09d2 From 00142bce94dc31a5d8ecfd0238114354dd07ac6e Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 17 Sep 2021 19:57:52 -0700 Subject: drm/i915: rename debugfs_engines files We shouldn't be using debugfs_ namespace for this functionality. Rename debugfs_engines.[ch] to intel_gt_engines_debugfs.[ch] and then make functions, defines and structs follow suit. Signed-off-by: Lucas De Marchi Acked-by: Jani Nikula Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210918025754.1254705-2-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/Makefile | 2 +- drivers/gpu/drm/i915/gt/debugfs_engines.c | 36 ---------------------- drivers/gpu/drm/i915/gt/debugfs_engines.h | 14 --------- drivers/gpu/drm/i915/gt/intel_gt_debugfs.c | 4 +-- drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c | 36 ++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.h | 14 +++++++++ 6 files changed, 53 insertions(+), 53 deletions(-) delete mode 100644 drivers/gpu/drm/i915/gt/debugfs_engines.c delete mode 100644 drivers/gpu/drm/i915/gt/debugfs_engines.h create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.h (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 7eab4edae2b6..3c68909d9066 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -78,7 +78,6 @@ i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o # "Graphics Technology" (aka we talk to the gpu) gt-y += \ - gt/debugfs_engines.o \ gt/debugfs_gt_pm.o \ gt/gen2_engine_cs.o \ gt/gen6_engine_cs.o \ @@ -100,6 +99,7 @@ gt-y += \ gt/intel_gt_buffer_pool.o \ gt/intel_gt_clock_utils.o \ gt/intel_gt_debugfs.o \ + gt/intel_gt_engines_debugfs.o \ gt/intel_gt_irq.o \ gt/intel_gt_pm.o \ gt/intel_gt_pm_irq.o \ diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.c b/drivers/gpu/drm/i915/gt/debugfs_engines.c deleted file mode 100644 index 2980dac5b171..000000000000 --- a/drivers/gpu/drm/i915/gt/debugfs_engines.c +++ /dev/null @@ -1,36 +0,0 @@ -// SPDX-License-Identifier: MIT - -/* - * Copyright © 2019 Intel Corporation - */ - -#include - -#include "debugfs_engines.h" -#include "i915_drv.h" /* for_each_engine! */ -#include "intel_engine.h" -#include "intel_gt_debugfs.h" - -static int engines_show(struct seq_file *m, void *data) -{ - struct intel_gt *gt = m->private; - struct intel_engine_cs *engine; - enum intel_engine_id id; - struct drm_printer p; - - p = drm_seq_file_printer(m); - for_each_engine(engine, gt, id) - intel_engine_dump(engine, &p, "%s\n", engine->name); - - return 0; -} -DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(engines); - -void debugfs_engines_register(struct intel_gt *gt, struct dentry *root) -{ - static const struct intel_gt_debugfs_file files[] = { - { "engines", &engines_fops }, - }; - - intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt); -} diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.h b/drivers/gpu/drm/i915/gt/debugfs_engines.h deleted file mode 100644 index f69257eaa1cc..000000000000 --- a/drivers/gpu/drm/i915/gt/debugfs_engines.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2019 Intel Corporation - */ - -#ifndef DEBUGFS_ENGINES_H -#define DEBUGFS_ENGINES_H - -struct intel_gt; -struct dentry; - -void debugfs_engines_register(struct intel_gt *gt, struct dentry *root); - -#endif /* DEBUGFS_ENGINES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c index b19648008265..3c77c2965e19 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c @@ -5,10 +5,10 @@ #include -#include "debugfs_engines.h" #include "debugfs_gt_pm.h" #include "i915_drv.h" #include "intel_gt_debugfs.h" +#include "intel_gt_engines_debugfs.h" #include "intel_sseu_debugfs.h" #include "uc/intel_uc_debugfs.h" @@ -23,7 +23,7 @@ void intel_gt_debugfs_register(struct intel_gt *gt) if (IS_ERR(root)) return; - debugfs_engines_register(gt, root); + intel_gt_engines_debugfs_register(gt, root); debugfs_gt_pm_register(gt, root); intel_sseu_debugfs_register(gt, root); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c new file mode 100644 index 000000000000..8f9b874fdc9c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: MIT + +/* + * Copyright © 2019 Intel Corporation + */ + +#include + +#include "i915_drv.h" /* for_each_engine! */ +#include "intel_engine.h" +#include "intel_gt_debugfs.h" +#include "intel_gt_engines_debugfs.h" + +static int engines_show(struct seq_file *m, void *data) +{ + struct intel_gt *gt = m->private; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct drm_printer p; + + p = drm_seq_file_printer(m); + for_each_engine(engine, gt, id) + intel_engine_dump(engine, &p, "%s\n", engine->name); + + return 0; +} +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(engines); + +void intel_gt_engines_debugfs_register(struct intel_gt *gt, struct dentry *root) +{ + static const struct intel_gt_debugfs_file files[] = { + { "engines", &engines_fops }, + }; + + intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.h new file mode 100644 index 000000000000..dda113452da9 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_GT_ENGINES_DEBUGFS_H +#define INTEL_GT_ENGINES_DEBUGFS_H + +struct intel_gt; +struct dentry; + +void intel_gt_engines_debugfs_register(struct intel_gt *gt, struct dentry *root); + +#endif /* INTEL_GT_ENGINES_DEBUGFS_H */ -- cgit v1.2.3-70-g09d2 From 23f6a829a67cd40ecd1b90dede33b8395f105f81 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 17 Sep 2021 19:57:53 -0700 Subject: drm/i915: rename debugfs_gt_pm files We shouldn't be using debugfs_ namespace for this functionality. Rename debugfs_gt_pm.[ch] to intel_gt_pm_debugfs.[ch] and then make functions, defines and structs follow suit. Signed-off-by: Lucas De Marchi Acked-by: Jani Nikula Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210918025754.1254705-3-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/Makefile | 2 +- drivers/gpu/drm/i915/gt/debugfs_gt_pm.c | 628 -------------------------- drivers/gpu/drm/i915/gt/debugfs_gt_pm.h | 14 - drivers/gpu/drm/i915/gt/intel_gt_debugfs.c | 4 +- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 628 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h | 14 + 6 files changed, 645 insertions(+), 645 deletions(-) delete mode 100644 drivers/gpu/drm/i915/gt/debugfs_gt_pm.c delete mode 100644 drivers/gpu/drm/i915/gt/debugfs_gt_pm.h create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 3c68909d9066..0823ea80fdc6 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -78,7 +78,6 @@ i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o # "Graphics Technology" (aka we talk to the gpu) gt-y += \ - gt/debugfs_gt_pm.o \ gt/gen2_engine_cs.o \ gt/gen6_engine_cs.o \ gt/gen6_ppgtt.o \ @@ -102,6 +101,7 @@ gt-y += \ gt/intel_gt_engines_debugfs.o \ gt/intel_gt_irq.o \ gt/intel_gt_pm.o \ + gt/intel_gt_pm_debugfs.o \ gt/intel_gt_pm_irq.o \ gt/intel_gt_requests.o \ gt/intel_gtt.o \ diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c deleted file mode 100644 index 9222cf68c56c..000000000000 --- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c +++ /dev/null @@ -1,628 +0,0 @@ -// SPDX-License-Identifier: MIT - -/* - * Copyright © 2019 Intel Corporation - */ - -#include - -#include "debugfs_gt_pm.h" -#include "i915_drv.h" -#include "intel_gt.h" -#include "intel_gt_clock_utils.h" -#include "intel_gt_debugfs.h" -#include "intel_gt_pm.h" -#include "intel_llc.h" -#include "intel_rc6.h" -#include "intel_rps.h" -#include "intel_runtime_pm.h" -#include "intel_sideband.h" -#include "intel_uncore.h" - -static int fw_domains_show(struct seq_file *m, void *data) -{ - struct intel_gt *gt = m->private; - struct intel_uncore *uncore = gt->uncore; - struct intel_uncore_forcewake_domain *fw_domain; - unsigned int tmp; - - seq_printf(m, "user.bypass_count = %u\n", - uncore->user_forcewake_count); - - for_each_fw_domain(fw_domain, uncore, tmp) - seq_printf(m, "%s.wake_count = %u\n", - intel_uncore_forcewake_domain_to_str(fw_domain->id), - READ_ONCE(fw_domain->wake_count)); - - return 0; -} -DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains); - -static void print_rc6_res(struct seq_file *m, - const char *title, - const i915_reg_t reg) -{ - struct intel_gt *gt = m->private; - intel_wakeref_t wakeref; - - with_intel_runtime_pm(gt->uncore->rpm, wakeref) - seq_printf(m, "%s %u (%llu us)\n", title, - intel_uncore_read(gt->uncore, reg), - intel_rc6_residency_us(>->rc6, reg)); -} - -static int vlv_drpc(struct seq_file *m) -{ - struct intel_gt *gt = m->private; - struct intel_uncore *uncore = gt->uncore; - u32 rcctl1, pw_status; - - pw_status = intel_uncore_read(uncore, VLV_GTLC_PW_STATUS); - rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); - - seq_printf(m, "RC6 Enabled: %s\n", - yesno(rcctl1 & (GEN7_RC_CTL_TO_MODE | - GEN6_RC_CTL_EI_MODE(1)))); - seq_printf(m, "Render Power Well: %s\n", - (pw_status & VLV_GTLC_PW_RENDER_STATUS_MASK) ? "Up" : "Down"); - seq_printf(m, "Media Power Well: %s\n", - (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down"); - - print_rc6_res(m, "Render RC6 residency since boot:", VLV_GT_RENDER_RC6); - print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6); - - return fw_domains_show(m, NULL); -} - -static int gen6_drpc(struct seq_file *m) -{ - struct intel_gt *gt = m->private; - struct drm_i915_private *i915 = gt->i915; - struct intel_uncore *uncore = gt->uncore; - u32 gt_core_status, rcctl1, rc6vids = 0; - u32 gen9_powergate_enable = 0, gen9_powergate_status = 0; - - gt_core_status = intel_uncore_read_fw(uncore, GEN6_GT_CORE_STATUS); - - rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); - if (GRAPHICS_VER(i915) >= 9) { - gen9_powergate_enable = - intel_uncore_read(uncore, GEN9_PG_ENABLE); - gen9_powergate_status = - intel_uncore_read(uncore, GEN9_PWRGT_DOMAIN_STATUS); - } - - if (GRAPHICS_VER(i915) <= 7) - sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS, - &rc6vids, NULL); - - seq_printf(m, "RC1e Enabled: %s\n", - yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); - seq_printf(m, "RC6 Enabled: %s\n", - yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE)); - if (GRAPHICS_VER(i915) >= 9) { - seq_printf(m, "Render Well Gating Enabled: %s\n", - yesno(gen9_powergate_enable & GEN9_RENDER_PG_ENABLE)); - seq_printf(m, "Media Well Gating Enabled: %s\n", - yesno(gen9_powergate_enable & GEN9_MEDIA_PG_ENABLE)); - } - seq_printf(m, "Deep RC6 Enabled: %s\n", - yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE)); - seq_printf(m, "Deepest RC6 Enabled: %s\n", - yesno(rcctl1 & GEN6_RC_CTL_RC6pp_ENABLE)); - seq_puts(m, "Current RC state: "); - switch (gt_core_status & GEN6_RCn_MASK) { - case GEN6_RC0: - if (gt_core_status & GEN6_CORE_CPD_STATE_MASK) - seq_puts(m, "Core Power Down\n"); - else - seq_puts(m, "on\n"); - break; - case GEN6_RC3: - seq_puts(m, "RC3\n"); - break; - case GEN6_RC6: - seq_puts(m, "RC6\n"); - break; - case GEN6_RC7: - seq_puts(m, "RC7\n"); - break; - default: - seq_puts(m, "Unknown\n"); - break; - } - - seq_printf(m, "Core Power Down: %s\n", - yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK)); - if (GRAPHICS_VER(i915) >= 9) { - seq_printf(m, "Render Power Well: %s\n", - (gen9_powergate_status & - GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down"); - seq_printf(m, "Media Power Well: %s\n", - (gen9_powergate_status & - GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down"); - } - - /* Not exactly sure what this is */ - print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:", - GEN6_GT_GFX_RC6_LOCKED); - print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6); - print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p); - print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp); - - if (GRAPHICS_VER(i915) <= 7) { - seq_printf(m, "RC6 voltage: %dmV\n", - GEN6_DECODE_RC6_VID(((rc6vids >> 0) & 0xff))); - seq_printf(m, "RC6+ voltage: %dmV\n", - GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff))); - seq_printf(m, "RC6++ voltage: %dmV\n", - GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff))); - } - - return fw_domains_show(m, NULL); -} - -static int ilk_drpc(struct seq_file *m) -{ - struct intel_gt *gt = m->private; - struct intel_uncore *uncore = gt->uncore; - u32 rgvmodectl, rstdbyctl; - u16 crstandvid; - - rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); - rstdbyctl = intel_uncore_read(uncore, RSTDBYCTL); - crstandvid = intel_uncore_read16(uncore, CRSTANDVID); - - seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN)); - seq_printf(m, "Boost freq: %d\n", - (rgvmodectl & MEMMODE_BOOST_FREQ_MASK) >> - MEMMODE_BOOST_FREQ_SHIFT); - seq_printf(m, "HW control enabled: %s\n", - yesno(rgvmodectl & MEMMODE_HWIDLE_EN)); - seq_printf(m, "SW control enabled: %s\n", - yesno(rgvmodectl & MEMMODE_SWMODE_EN)); - seq_printf(m, "Gated voltage change: %s\n", - yesno(rgvmodectl & MEMMODE_RCLK_GATE)); - seq_printf(m, "Starting frequency: P%d\n", - (rgvmodectl & MEMMODE_FSTART_MASK) >> MEMMODE_FSTART_SHIFT); - seq_printf(m, "Max P-state: P%d\n", - (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT); - seq_printf(m, "Min P-state: P%d\n", (rgvmodectl & MEMMODE_FMIN_MASK)); - seq_printf(m, "RS1 VID: %d\n", (crstandvid & 0x3f)); - seq_printf(m, "RS2 VID: %d\n", ((crstandvid >> 8) & 0x3f)); - seq_printf(m, "Render standby enabled: %s\n", - yesno(!(rstdbyctl & RCX_SW_EXIT))); - seq_puts(m, "Current RS state: "); - switch (rstdbyctl & RSX_STATUS_MASK) { - case RSX_STATUS_ON: - seq_puts(m, "on\n"); - break; - case RSX_STATUS_RC1: - seq_puts(m, "RC1\n"); - break; - case RSX_STATUS_RC1E: - seq_puts(m, "RC1E\n"); - break; - case RSX_STATUS_RS1: - seq_puts(m, "RS1\n"); - break; - case RSX_STATUS_RS2: - seq_puts(m, "RS2 (RC6)\n"); - break; - case RSX_STATUS_RS3: - seq_puts(m, "RC3 (RC6+)\n"); - break; - default: - seq_puts(m, "unknown\n"); - break; - } - - return 0; -} - -static int drpc_show(struct seq_file *m, void *unused) -{ - struct intel_gt *gt = m->private; - struct drm_i915_private *i915 = gt->i915; - intel_wakeref_t wakeref; - int err = -ENODEV; - - with_intel_runtime_pm(gt->uncore->rpm, wakeref) { - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) - err = vlv_drpc(m); - else if (GRAPHICS_VER(i915) >= 6) - err = gen6_drpc(m); - else - err = ilk_drpc(m); - } - - return err; -} -DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(drpc); - -static int frequency_show(struct seq_file *m, void *unused) -{ - struct intel_gt *gt = m->private; - struct drm_i915_private *i915 = gt->i915; - struct intel_uncore *uncore = gt->uncore; - struct intel_rps *rps = >->rps; - intel_wakeref_t wakeref; - - wakeref = intel_runtime_pm_get(uncore->rpm); - - if (GRAPHICS_VER(i915) == 5) { - u16 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); - u16 rgvstat = intel_uncore_read16(uncore, MEMSTAT_ILK); - - seq_printf(m, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf); - seq_printf(m, "Requested VID: %d\n", rgvswctl & 0x3f); - seq_printf(m, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >> - MEMSTAT_VID_SHIFT); - seq_printf(m, "Current P-state: %d\n", - (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT); - } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { - u32 rpmodectl, freq_sts; - - rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); - seq_printf(m, "Video Turbo Mode: %s\n", - yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); - seq_printf(m, "HW control enabled: %s\n", - yesno(rpmodectl & GEN6_RP_ENABLE)); - seq_printf(m, "SW control enabled: %s\n", - yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == - GEN6_RP_MEDIA_SW_MODE)); - - vlv_punit_get(i915); - freq_sts = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); - vlv_punit_put(i915); - - seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts); - seq_printf(m, "DDR freq: %d MHz\n", i915->mem_freq); - - seq_printf(m, "actual GPU freq: %d MHz\n", - intel_gpu_freq(rps, (freq_sts >> 8) & 0xff)); - - seq_printf(m, "current GPU freq: %d MHz\n", - intel_gpu_freq(rps, rps->cur_freq)); - - seq_printf(m, "max GPU freq: %d MHz\n", - intel_gpu_freq(rps, rps->max_freq)); - - seq_printf(m, "min GPU freq: %d MHz\n", - intel_gpu_freq(rps, rps->min_freq)); - - seq_printf(m, "idle GPU freq: %d MHz\n", - intel_gpu_freq(rps, rps->idle_freq)); - - seq_printf(m, "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(rps, rps->efficient_freq)); - } else if (GRAPHICS_VER(i915) >= 6) { - u32 rp_state_limits; - u32 gt_perf_status; - u32 rp_state_cap; - u32 rpmodectl, rpinclimit, rpdeclimit; - u32 rpstat, cagf, reqf; - u32 rpcurupei, rpcurup, rpprevup; - u32 rpcurdownei, rpcurdown, rpprevdown; - u32 rpupei, rpupt, rpdownei, rpdownt; - u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask; - int max_freq; - - rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); - rp_state_cap = intel_rps_read_state_cap(rps); - if (IS_GEN9_LP(i915)) - gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); - else - gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); - - /* RPSTAT1 is in the GT power well */ - intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); - - reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ); - if (GRAPHICS_VER(i915) >= 9) { - reqf >>= 23; - } else { - reqf &= ~GEN6_TURBO_DISABLE; - if (IS_HASWELL(i915) || IS_BROADWELL(i915)) - reqf >>= 24; - else - reqf >>= 25; - } - reqf = intel_gpu_freq(rps, reqf); - - rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); - rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); - rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); - - rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1); - rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; - rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; - rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; - rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; - rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; - rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; - - rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI); - rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); - - rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); - rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); - - cagf = intel_rps_read_actual_frequency(rps); - - intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); - - if (GRAPHICS_VER(i915) >= 11) { - pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE); - pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK); - /* - * The equivalent to the PM ISR & IIR cannot be read - * without affecting the current state of the system - */ - pm_isr = 0; - pm_iir = 0; - } else if (GRAPHICS_VER(i915) >= 8) { - pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2)); - pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2)); - pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2)); - pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2)); - } else { - pm_ier = intel_uncore_read(uncore, GEN6_PMIER); - pm_imr = intel_uncore_read(uncore, GEN6_PMIMR); - pm_isr = intel_uncore_read(uncore, GEN6_PMISR); - pm_iir = intel_uncore_read(uncore, GEN6_PMIIR); - } - pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); - - seq_printf(m, "Video Turbo Mode: %s\n", - yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); - seq_printf(m, "HW control enabled: %s\n", - yesno(rpmodectl & GEN6_RP_ENABLE)); - seq_printf(m, "SW control enabled: %s\n", - yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == - GEN6_RP_MEDIA_SW_MODE)); - - seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", - pm_ier, pm_imr, pm_mask); - if (GRAPHICS_VER(i915) <= 10) - seq_printf(m, "PM ISR=0x%08x IIR=0x%08x\n", - pm_isr, pm_iir); - seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n", - rps->pm_intrmsk_mbz); - seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); - seq_printf(m, "Render p-state ratio: %d\n", - (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8); - seq_printf(m, "Render p-state VID: %d\n", - gt_perf_status & 0xff); - seq_printf(m, "Render p-state limit: %d\n", - rp_state_limits & 0xff); - seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat); - seq_printf(m, "RPMODECTL: 0x%08x\n", rpmodectl); - seq_printf(m, "RPINCLIMIT: 0x%08x\n", rpinclimit); - seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit); - seq_printf(m, "RPNSWREQ: %dMHz\n", reqf); - seq_printf(m, "CAGF: %dMHz\n", cagf); - seq_printf(m, "RP CUR UP EI: %d (%lldns)\n", - rpcurupei, - intel_gt_pm_interval_to_ns(gt, rpcurupei)); - seq_printf(m, "RP CUR UP: %d (%lldns)\n", - rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup)); - seq_printf(m, "RP PREV UP: %d (%lldns)\n", - rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup)); - seq_printf(m, "Up threshold: %d%%\n", - rps->power.up_threshold); - seq_printf(m, "RP UP EI: %d (%lldns)\n", - rpupei, intel_gt_pm_interval_to_ns(gt, rpupei)); - seq_printf(m, "RP UP THRESHOLD: %d (%lldns)\n", - rpupt, intel_gt_pm_interval_to_ns(gt, rpupt)); - - seq_printf(m, "RP CUR DOWN EI: %d (%lldns)\n", - rpcurdownei, - intel_gt_pm_interval_to_ns(gt, rpcurdownei)); - seq_printf(m, "RP CUR DOWN: %d (%lldns)\n", - rpcurdown, - intel_gt_pm_interval_to_ns(gt, rpcurdown)); - seq_printf(m, "RP PREV DOWN: %d (%lldns)\n", - rpprevdown, - intel_gt_pm_interval_to_ns(gt, rpprevdown)); - seq_printf(m, "Down threshold: %d%%\n", - rps->power.down_threshold); - seq_printf(m, "RP DOWN EI: %d (%lldns)\n", - rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei)); - seq_printf(m, "RP DOWN THRESHOLD: %d (%lldns)\n", - rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt)); - - max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 : - rp_state_cap >> 16) & 0xff; - max_freq *= (IS_GEN9_BC(i915) || - GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); - seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", - intel_gpu_freq(rps, max_freq)); - - max_freq = (rp_state_cap & 0xff00) >> 8; - max_freq *= (IS_GEN9_BC(i915) || - GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); - seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", - intel_gpu_freq(rps, max_freq)); - - max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 : - rp_state_cap >> 0) & 0xff; - max_freq *= (IS_GEN9_BC(i915) || - GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); - seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", - intel_gpu_freq(rps, max_freq)); - seq_printf(m, "Max overclocked frequency: %dMHz\n", - intel_gpu_freq(rps, rps->max_freq)); - - seq_printf(m, "Current freq: %d MHz\n", - intel_gpu_freq(rps, rps->cur_freq)); - seq_printf(m, "Actual freq: %d MHz\n", cagf); - seq_printf(m, "Idle freq: %d MHz\n", - intel_gpu_freq(rps, rps->idle_freq)); - seq_printf(m, "Min freq: %d MHz\n", - intel_gpu_freq(rps, rps->min_freq)); - seq_printf(m, "Boost freq: %d MHz\n", - intel_gpu_freq(rps, rps->boost_freq)); - seq_printf(m, "Max freq: %d MHz\n", - intel_gpu_freq(rps, rps->max_freq)); - seq_printf(m, - "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(rps, rps->efficient_freq)); - } else { - seq_puts(m, "no P-state info available\n"); - } - - seq_printf(m, "Current CD clock frequency: %d kHz\n", i915->cdclk.hw.cdclk); - seq_printf(m, "Max CD clock frequency: %d kHz\n", i915->max_cdclk_freq); - seq_printf(m, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq); - - intel_runtime_pm_put(uncore->rpm, wakeref); - - return 0; -} -DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(frequency); - -static int llc_show(struct seq_file *m, void *data) -{ - struct intel_gt *gt = m->private; - struct drm_i915_private *i915 = gt->i915; - const bool edram = GRAPHICS_VER(i915) > 8; - struct intel_rps *rps = >->rps; - unsigned int max_gpu_freq, min_gpu_freq; - intel_wakeref_t wakeref; - int gpu_freq, ia_freq; - - seq_printf(m, "LLC: %s\n", yesno(HAS_LLC(i915))); - seq_printf(m, "%s: %uMB\n", edram ? "eDRAM" : "eLLC", - i915->edram_size_mb); - - min_gpu_freq = rps->min_freq; - max_gpu_freq = rps->max_freq; - if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { - /* Convert GT frequency to 50 HZ units */ - min_gpu_freq /= GEN9_FREQ_SCALER; - max_gpu_freq /= GEN9_FREQ_SCALER; - } - - seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n"); - - wakeref = intel_runtime_pm_get(gt->uncore->rpm); - for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { - ia_freq = gpu_freq; - sandybridge_pcode_read(i915, - GEN6_PCODE_READ_MIN_FREQ_TABLE, - &ia_freq, NULL); - seq_printf(m, "%d\t\t%d\t\t\t\t%d\n", - intel_gpu_freq(rps, - (gpu_freq * - (IS_GEN9_BC(i915) || - GRAPHICS_VER(i915) >= 11 ? - GEN9_FREQ_SCALER : 1))), - ((ia_freq >> 0) & 0xff) * 100, - ((ia_freq >> 8) & 0xff) * 100); - } - intel_runtime_pm_put(gt->uncore->rpm, wakeref); - - return 0; -} - -static bool llc_eval(void *data) -{ - struct intel_gt *gt = data; - - return HAS_LLC(gt->i915); -} - -DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(llc); - -static const char *rps_power_to_str(unsigned int power) -{ - static const char * const strings[] = { - [LOW_POWER] = "low power", - [BETWEEN] = "mixed", - [HIGH_POWER] = "high power", - }; - - if (power >= ARRAY_SIZE(strings) || !strings[power]) - return "unknown"; - - return strings[power]; -} - -static int rps_boost_show(struct seq_file *m, void *data) -{ - struct intel_gt *gt = m->private; - struct drm_i915_private *i915 = gt->i915; - struct intel_rps *rps = >->rps; - - seq_printf(m, "RPS enabled? %s\n", yesno(intel_rps_is_enabled(rps))); - seq_printf(m, "RPS active? %s\n", yesno(intel_rps_is_active(rps))); - seq_printf(m, "GPU busy? %s, %llums\n", - yesno(gt->awake), - ktime_to_ms(intel_gt_get_awake_time(gt))); - seq_printf(m, "Boosts outstanding? %d\n", - atomic_read(&rps->num_waiters)); - seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); - seq_printf(m, "Frequency requested %d, actual %d\n", - intel_gpu_freq(rps, rps->cur_freq), - intel_rps_read_actual_frequency(rps)); - seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n", - intel_gpu_freq(rps, rps->min_freq), - intel_gpu_freq(rps, rps->min_freq_softlimit), - intel_gpu_freq(rps, rps->max_freq_softlimit), - intel_gpu_freq(rps, rps->max_freq)); - seq_printf(m, " idle:%d, efficient:%d, boost:%d\n", - intel_gpu_freq(rps, rps->idle_freq), - intel_gpu_freq(rps, rps->efficient_freq), - intel_gpu_freq(rps, rps->boost_freq)); - - seq_printf(m, "Wait boosts: %d\n", READ_ONCE(rps->boosts)); - - if (GRAPHICS_VER(i915) >= 6 && intel_rps_is_active(rps)) { - struct intel_uncore *uncore = gt->uncore; - u32 rpup, rpupei; - u32 rpdown, rpdownei; - - intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); - rpup = intel_uncore_read_fw(uncore, GEN6_RP_CUR_UP) & GEN6_RP_EI_MASK; - rpupei = intel_uncore_read_fw(uncore, GEN6_RP_CUR_UP_EI) & GEN6_RP_EI_MASK; - rpdown = intel_uncore_read_fw(uncore, GEN6_RP_CUR_DOWN) & GEN6_RP_EI_MASK; - rpdownei = intel_uncore_read_fw(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_RP_EI_MASK; - intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); - - seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n", - rps_power_to_str(rps->power.mode)); - seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n", - rpup && rpupei ? 100 * rpup / rpupei : 0, - rps->power.up_threshold); - seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n", - rpdown && rpdownei ? 100 * rpdown / rpdownei : 0, - rps->power.down_threshold); - } else { - seq_puts(m, "\nRPS Autotuning inactive\n"); - } - - return 0; -} - -static bool rps_eval(void *data) -{ - struct intel_gt *gt = data; - - return HAS_RPS(gt->i915); -} - -DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rps_boost); - -void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root) -{ - static const struct intel_gt_debugfs_file files[] = { - { "drpc", &drpc_fops, NULL }, - { "frequency", &frequency_fops, NULL }, - { "forcewake", &fw_domains_fops, NULL }, - { "llc", &llc_fops, llc_eval }, - { "rps_boost", &rps_boost_fops, rps_eval }, - }; - - intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt); -} diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h deleted file mode 100644 index 4cf5f5c9da7d..000000000000 --- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2019 Intel Corporation - */ - -#ifndef DEBUGFS_GT_PM_H -#define DEBUGFS_GT_PM_H - -struct intel_gt; -struct dentry; - -void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root); - -#endif /* DEBUGFS_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c index 3c77c2965e19..03fb4aefbf90 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c @@ -5,10 +5,10 @@ #include -#include "debugfs_gt_pm.h" #include "i915_drv.h" #include "intel_gt_debugfs.h" #include "intel_gt_engines_debugfs.h" +#include "intel_gt_pm_debugfs.h" #include "intel_sseu_debugfs.h" #include "uc/intel_uc_debugfs.h" @@ -24,7 +24,7 @@ void intel_gt_debugfs_register(struct intel_gt *gt) return; intel_gt_engines_debugfs_register(gt, root); - debugfs_gt_pm_register(gt, root); + intel_gt_pm_debugfs_register(gt, root); intel_sseu_debugfs_register(gt, root); intel_uc_debugfs_register(>->uc, root); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c new file mode 100644 index 000000000000..250467108eda --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -0,0 +1,628 @@ +// SPDX-License-Identifier: MIT + +/* + * Copyright © 2019 Intel Corporation + */ + +#include + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_gt_clock_utils.h" +#include "intel_gt_debugfs.h" +#include "intel_gt_pm.h" +#include "intel_gt_pm_debugfs.h" +#include "intel_llc.h" +#include "intel_rc6.h" +#include "intel_rps.h" +#include "intel_runtime_pm.h" +#include "intel_sideband.h" +#include "intel_uncore.h" + +static int fw_domains_show(struct seq_file *m, void *data) +{ + struct intel_gt *gt = m->private; + struct intel_uncore *uncore = gt->uncore; + struct intel_uncore_forcewake_domain *fw_domain; + unsigned int tmp; + + seq_printf(m, "user.bypass_count = %u\n", + uncore->user_forcewake_count); + + for_each_fw_domain(fw_domain, uncore, tmp) + seq_printf(m, "%s.wake_count = %u\n", + intel_uncore_forcewake_domain_to_str(fw_domain->id), + READ_ONCE(fw_domain->wake_count)); + + return 0; +} +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains); + +static void print_rc6_res(struct seq_file *m, + const char *title, + const i915_reg_t reg) +{ + struct intel_gt *gt = m->private; + intel_wakeref_t wakeref; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + seq_printf(m, "%s %u (%llu us)\n", title, + intel_uncore_read(gt->uncore, reg), + intel_rc6_residency_us(>->rc6, reg)); +} + +static int vlv_drpc(struct seq_file *m) +{ + struct intel_gt *gt = m->private; + struct intel_uncore *uncore = gt->uncore; + u32 rcctl1, pw_status; + + pw_status = intel_uncore_read(uncore, VLV_GTLC_PW_STATUS); + rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); + + seq_printf(m, "RC6 Enabled: %s\n", + yesno(rcctl1 & (GEN7_RC_CTL_TO_MODE | + GEN6_RC_CTL_EI_MODE(1)))); + seq_printf(m, "Render Power Well: %s\n", + (pw_status & VLV_GTLC_PW_RENDER_STATUS_MASK) ? "Up" : "Down"); + seq_printf(m, "Media Power Well: %s\n", + (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down"); + + print_rc6_res(m, "Render RC6 residency since boot:", VLV_GT_RENDER_RC6); + print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6); + + return fw_domains_show(m, NULL); +} + +static int gen6_drpc(struct seq_file *m) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + u32 gt_core_status, rcctl1, rc6vids = 0; + u32 gen9_powergate_enable = 0, gen9_powergate_status = 0; + + gt_core_status = intel_uncore_read_fw(uncore, GEN6_GT_CORE_STATUS); + + rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); + if (GRAPHICS_VER(i915) >= 9) { + gen9_powergate_enable = + intel_uncore_read(uncore, GEN9_PG_ENABLE); + gen9_powergate_status = + intel_uncore_read(uncore, GEN9_PWRGT_DOMAIN_STATUS); + } + + if (GRAPHICS_VER(i915) <= 7) + sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS, + &rc6vids, NULL); + + seq_printf(m, "RC1e Enabled: %s\n", + yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); + seq_printf(m, "RC6 Enabled: %s\n", + yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE)); + if (GRAPHICS_VER(i915) >= 9) { + seq_printf(m, "Render Well Gating Enabled: %s\n", + yesno(gen9_powergate_enable & GEN9_RENDER_PG_ENABLE)); + seq_printf(m, "Media Well Gating Enabled: %s\n", + yesno(gen9_powergate_enable & GEN9_MEDIA_PG_ENABLE)); + } + seq_printf(m, "Deep RC6 Enabled: %s\n", + yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE)); + seq_printf(m, "Deepest RC6 Enabled: %s\n", + yesno(rcctl1 & GEN6_RC_CTL_RC6pp_ENABLE)); + seq_puts(m, "Current RC state: "); + switch (gt_core_status & GEN6_RCn_MASK) { + case GEN6_RC0: + if (gt_core_status & GEN6_CORE_CPD_STATE_MASK) + seq_puts(m, "Core Power Down\n"); + else + seq_puts(m, "on\n"); + break; + case GEN6_RC3: + seq_puts(m, "RC3\n"); + break; + case GEN6_RC6: + seq_puts(m, "RC6\n"); + break; + case GEN6_RC7: + seq_puts(m, "RC7\n"); + break; + default: + seq_puts(m, "Unknown\n"); + break; + } + + seq_printf(m, "Core Power Down: %s\n", + yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK)); + if (GRAPHICS_VER(i915) >= 9) { + seq_printf(m, "Render Power Well: %s\n", + (gen9_powergate_status & + GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down"); + seq_printf(m, "Media Power Well: %s\n", + (gen9_powergate_status & + GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down"); + } + + /* Not exactly sure what this is */ + print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:", + GEN6_GT_GFX_RC6_LOCKED); + print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6); + print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p); + print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp); + + if (GRAPHICS_VER(i915) <= 7) { + seq_printf(m, "RC6 voltage: %dmV\n", + GEN6_DECODE_RC6_VID(((rc6vids >> 0) & 0xff))); + seq_printf(m, "RC6+ voltage: %dmV\n", + GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff))); + seq_printf(m, "RC6++ voltage: %dmV\n", + GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff))); + } + + return fw_domains_show(m, NULL); +} + +static int ilk_drpc(struct seq_file *m) +{ + struct intel_gt *gt = m->private; + struct intel_uncore *uncore = gt->uncore; + u32 rgvmodectl, rstdbyctl; + u16 crstandvid; + + rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); + rstdbyctl = intel_uncore_read(uncore, RSTDBYCTL); + crstandvid = intel_uncore_read16(uncore, CRSTANDVID); + + seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN)); + seq_printf(m, "Boost freq: %d\n", + (rgvmodectl & MEMMODE_BOOST_FREQ_MASK) >> + MEMMODE_BOOST_FREQ_SHIFT); + seq_printf(m, "HW control enabled: %s\n", + yesno(rgvmodectl & MEMMODE_HWIDLE_EN)); + seq_printf(m, "SW control enabled: %s\n", + yesno(rgvmodectl & MEMMODE_SWMODE_EN)); + seq_printf(m, "Gated voltage change: %s\n", + yesno(rgvmodectl & MEMMODE_RCLK_GATE)); + seq_printf(m, "Starting frequency: P%d\n", + (rgvmodectl & MEMMODE_FSTART_MASK) >> MEMMODE_FSTART_SHIFT); + seq_printf(m, "Max P-state: P%d\n", + (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT); + seq_printf(m, "Min P-state: P%d\n", (rgvmodectl & MEMMODE_FMIN_MASK)); + seq_printf(m, "RS1 VID: %d\n", (crstandvid & 0x3f)); + seq_printf(m, "RS2 VID: %d\n", ((crstandvid >> 8) & 0x3f)); + seq_printf(m, "Render standby enabled: %s\n", + yesno(!(rstdbyctl & RCX_SW_EXIT))); + seq_puts(m, "Current RS state: "); + switch (rstdbyctl & RSX_STATUS_MASK) { + case RSX_STATUS_ON: + seq_puts(m, "on\n"); + break; + case RSX_STATUS_RC1: + seq_puts(m, "RC1\n"); + break; + case RSX_STATUS_RC1E: + seq_puts(m, "RC1E\n"); + break; + case RSX_STATUS_RS1: + seq_puts(m, "RS1\n"); + break; + case RSX_STATUS_RS2: + seq_puts(m, "RS2 (RC6)\n"); + break; + case RSX_STATUS_RS3: + seq_puts(m, "RC3 (RC6+)\n"); + break; + default: + seq_puts(m, "unknown\n"); + break; + } + + return 0; +} + +static int drpc_show(struct seq_file *m, void *unused) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + intel_wakeref_t wakeref; + int err = -ENODEV; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) { + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + err = vlv_drpc(m); + else if (GRAPHICS_VER(i915) >= 6) + err = gen6_drpc(m); + else + err = ilk_drpc(m); + } + + return err; +} +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(drpc); + +static int frequency_show(struct seq_file *m, void *unused) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_rps *rps = >->rps; + intel_wakeref_t wakeref; + + wakeref = intel_runtime_pm_get(uncore->rpm); + + if (GRAPHICS_VER(i915) == 5) { + u16 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); + u16 rgvstat = intel_uncore_read16(uncore, MEMSTAT_ILK); + + seq_printf(m, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf); + seq_printf(m, "Requested VID: %d\n", rgvswctl & 0x3f); + seq_printf(m, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >> + MEMSTAT_VID_SHIFT); + seq_printf(m, "Current P-state: %d\n", + (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT); + } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + u32 rpmodectl, freq_sts; + + rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); + seq_printf(m, "Video Turbo Mode: %s\n", + yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); + seq_printf(m, "HW control enabled: %s\n", + yesno(rpmodectl & GEN6_RP_ENABLE)); + seq_printf(m, "SW control enabled: %s\n", + yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == + GEN6_RP_MEDIA_SW_MODE)); + + vlv_punit_get(i915); + freq_sts = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + vlv_punit_put(i915); + + seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts); + seq_printf(m, "DDR freq: %d MHz\n", i915->mem_freq); + + seq_printf(m, "actual GPU freq: %d MHz\n", + intel_gpu_freq(rps, (freq_sts >> 8) & 0xff)); + + seq_printf(m, "current GPU freq: %d MHz\n", + intel_gpu_freq(rps, rps->cur_freq)); + + seq_printf(m, "max GPU freq: %d MHz\n", + intel_gpu_freq(rps, rps->max_freq)); + + seq_printf(m, "min GPU freq: %d MHz\n", + intel_gpu_freq(rps, rps->min_freq)); + + seq_printf(m, "idle GPU freq: %d MHz\n", + intel_gpu_freq(rps, rps->idle_freq)); + + seq_printf(m, "efficient (RPe) frequency: %d MHz\n", + intel_gpu_freq(rps, rps->efficient_freq)); + } else if (GRAPHICS_VER(i915) >= 6) { + u32 rp_state_limits; + u32 gt_perf_status; + u32 rp_state_cap; + u32 rpmodectl, rpinclimit, rpdeclimit; + u32 rpstat, cagf, reqf; + u32 rpcurupei, rpcurup, rpprevup; + u32 rpcurdownei, rpcurdown, rpprevdown; + u32 rpupei, rpupt, rpdownei, rpdownt; + u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask; + int max_freq; + + rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); + rp_state_cap = intel_rps_read_state_cap(rps); + if (IS_GEN9_LP(i915)) + gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); + else + gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); + + /* RPSTAT1 is in the GT power well */ + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ); + if (GRAPHICS_VER(i915) >= 9) { + reqf >>= 23; + } else { + reqf &= ~GEN6_TURBO_DISABLE; + if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + reqf >>= 24; + else + reqf >>= 25; + } + reqf = intel_gpu_freq(rps, reqf); + + rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); + rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); + rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); + + rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1); + rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; + rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; + rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; + rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; + rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; + rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; + + rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI); + rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); + + rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); + rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); + + cagf = intel_rps_read_actual_frequency(rps); + + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + + if (GRAPHICS_VER(i915) >= 11) { + pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE); + pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK); + /* + * The equivalent to the PM ISR & IIR cannot be read + * without affecting the current state of the system + */ + pm_isr = 0; + pm_iir = 0; + } else if (GRAPHICS_VER(i915) >= 8) { + pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2)); + pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2)); + pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2)); + pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2)); + } else { + pm_ier = intel_uncore_read(uncore, GEN6_PMIER); + pm_imr = intel_uncore_read(uncore, GEN6_PMIMR); + pm_isr = intel_uncore_read(uncore, GEN6_PMISR); + pm_iir = intel_uncore_read(uncore, GEN6_PMIIR); + } + pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); + + seq_printf(m, "Video Turbo Mode: %s\n", + yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); + seq_printf(m, "HW control enabled: %s\n", + yesno(rpmodectl & GEN6_RP_ENABLE)); + seq_printf(m, "SW control enabled: %s\n", + yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == + GEN6_RP_MEDIA_SW_MODE)); + + seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", + pm_ier, pm_imr, pm_mask); + if (GRAPHICS_VER(i915) <= 10) + seq_printf(m, "PM ISR=0x%08x IIR=0x%08x\n", + pm_isr, pm_iir); + seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n", + rps->pm_intrmsk_mbz); + seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); + seq_printf(m, "Render p-state ratio: %d\n", + (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8); + seq_printf(m, "Render p-state VID: %d\n", + gt_perf_status & 0xff); + seq_printf(m, "Render p-state limit: %d\n", + rp_state_limits & 0xff); + seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat); + seq_printf(m, "RPMODECTL: 0x%08x\n", rpmodectl); + seq_printf(m, "RPINCLIMIT: 0x%08x\n", rpinclimit); + seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit); + seq_printf(m, "RPNSWREQ: %dMHz\n", reqf); + seq_printf(m, "CAGF: %dMHz\n", cagf); + seq_printf(m, "RP CUR UP EI: %d (%lldns)\n", + rpcurupei, + intel_gt_pm_interval_to_ns(gt, rpcurupei)); + seq_printf(m, "RP CUR UP: %d (%lldns)\n", + rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup)); + seq_printf(m, "RP PREV UP: %d (%lldns)\n", + rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup)); + seq_printf(m, "Up threshold: %d%%\n", + rps->power.up_threshold); + seq_printf(m, "RP UP EI: %d (%lldns)\n", + rpupei, intel_gt_pm_interval_to_ns(gt, rpupei)); + seq_printf(m, "RP UP THRESHOLD: %d (%lldns)\n", + rpupt, intel_gt_pm_interval_to_ns(gt, rpupt)); + + seq_printf(m, "RP CUR DOWN EI: %d (%lldns)\n", + rpcurdownei, + intel_gt_pm_interval_to_ns(gt, rpcurdownei)); + seq_printf(m, "RP CUR DOWN: %d (%lldns)\n", + rpcurdown, + intel_gt_pm_interval_to_ns(gt, rpcurdown)); + seq_printf(m, "RP PREV DOWN: %d (%lldns)\n", + rpprevdown, + intel_gt_pm_interval_to_ns(gt, rpprevdown)); + seq_printf(m, "Down threshold: %d%%\n", + rps->power.down_threshold); + seq_printf(m, "RP DOWN EI: %d (%lldns)\n", + rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei)); + seq_printf(m, "RP DOWN THRESHOLD: %d (%lldns)\n", + rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt)); + + max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 : + rp_state_cap >> 16) & 0xff; + max_freq *= (IS_GEN9_BC(i915) || + GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); + seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", + intel_gpu_freq(rps, max_freq)); + + max_freq = (rp_state_cap & 0xff00) >> 8; + max_freq *= (IS_GEN9_BC(i915) || + GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); + seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", + intel_gpu_freq(rps, max_freq)); + + max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 : + rp_state_cap >> 0) & 0xff; + max_freq *= (IS_GEN9_BC(i915) || + GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); + seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", + intel_gpu_freq(rps, max_freq)); + seq_printf(m, "Max overclocked frequency: %dMHz\n", + intel_gpu_freq(rps, rps->max_freq)); + + seq_printf(m, "Current freq: %d MHz\n", + intel_gpu_freq(rps, rps->cur_freq)); + seq_printf(m, "Actual freq: %d MHz\n", cagf); + seq_printf(m, "Idle freq: %d MHz\n", + intel_gpu_freq(rps, rps->idle_freq)); + seq_printf(m, "Min freq: %d MHz\n", + intel_gpu_freq(rps, rps->min_freq)); + seq_printf(m, "Boost freq: %d MHz\n", + intel_gpu_freq(rps, rps->boost_freq)); + seq_printf(m, "Max freq: %d MHz\n", + intel_gpu_freq(rps, rps->max_freq)); + seq_printf(m, + "efficient (RPe) frequency: %d MHz\n", + intel_gpu_freq(rps, rps->efficient_freq)); + } else { + seq_puts(m, "no P-state info available\n"); + } + + seq_printf(m, "Current CD clock frequency: %d kHz\n", i915->cdclk.hw.cdclk); + seq_printf(m, "Max CD clock frequency: %d kHz\n", i915->max_cdclk_freq); + seq_printf(m, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq); + + intel_runtime_pm_put(uncore->rpm, wakeref); + + return 0; +} +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(frequency); + +static int llc_show(struct seq_file *m, void *data) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + const bool edram = GRAPHICS_VER(i915) > 8; + struct intel_rps *rps = >->rps; + unsigned int max_gpu_freq, min_gpu_freq; + intel_wakeref_t wakeref; + int gpu_freq, ia_freq; + + seq_printf(m, "LLC: %s\n", yesno(HAS_LLC(i915))); + seq_printf(m, "%s: %uMB\n", edram ? "eDRAM" : "eLLC", + i915->edram_size_mb); + + min_gpu_freq = rps->min_freq; + max_gpu_freq = rps->max_freq; + if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { + /* Convert GT frequency to 50 HZ units */ + min_gpu_freq /= GEN9_FREQ_SCALER; + max_gpu_freq /= GEN9_FREQ_SCALER; + } + + seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n"); + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { + ia_freq = gpu_freq; + sandybridge_pcode_read(i915, + GEN6_PCODE_READ_MIN_FREQ_TABLE, + &ia_freq, NULL); + seq_printf(m, "%d\t\t%d\t\t\t\t%d\n", + intel_gpu_freq(rps, + (gpu_freq * + (IS_GEN9_BC(i915) || + GRAPHICS_VER(i915) >= 11 ? + GEN9_FREQ_SCALER : 1))), + ((ia_freq >> 0) & 0xff) * 100, + ((ia_freq >> 8) & 0xff) * 100); + } + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + + return 0; +} + +static bool llc_eval(void *data) +{ + struct intel_gt *gt = data; + + return HAS_LLC(gt->i915); +} + +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(llc); + +static const char *rps_power_to_str(unsigned int power) +{ + static const char * const strings[] = { + [LOW_POWER] = "low power", + [BETWEEN] = "mixed", + [HIGH_POWER] = "high power", + }; + + if (power >= ARRAY_SIZE(strings) || !strings[power]) + return "unknown"; + + return strings[power]; +} + +static int rps_boost_show(struct seq_file *m, void *data) +{ + struct intel_gt *gt = m->private; + struct drm_i915_private *i915 = gt->i915; + struct intel_rps *rps = >->rps; + + seq_printf(m, "RPS enabled? %s\n", yesno(intel_rps_is_enabled(rps))); + seq_printf(m, "RPS active? %s\n", yesno(intel_rps_is_active(rps))); + seq_printf(m, "GPU busy? %s, %llums\n", + yesno(gt->awake), + ktime_to_ms(intel_gt_get_awake_time(gt))); + seq_printf(m, "Boosts outstanding? %d\n", + atomic_read(&rps->num_waiters)); + seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); + seq_printf(m, "Frequency requested %d, actual %d\n", + intel_gpu_freq(rps, rps->cur_freq), + intel_rps_read_actual_frequency(rps)); + seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n", + intel_gpu_freq(rps, rps->min_freq), + intel_gpu_freq(rps, rps->min_freq_softlimit), + intel_gpu_freq(rps, rps->max_freq_softlimit), + intel_gpu_freq(rps, rps->max_freq)); + seq_printf(m, " idle:%d, efficient:%d, boost:%d\n", + intel_gpu_freq(rps, rps->idle_freq), + intel_gpu_freq(rps, rps->efficient_freq), + intel_gpu_freq(rps, rps->boost_freq)); + + seq_printf(m, "Wait boosts: %d\n", READ_ONCE(rps->boosts)); + + if (GRAPHICS_VER(i915) >= 6 && intel_rps_is_active(rps)) { + struct intel_uncore *uncore = gt->uncore; + u32 rpup, rpupei; + u32 rpdown, rpdownei; + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + rpup = intel_uncore_read_fw(uncore, GEN6_RP_CUR_UP) & GEN6_RP_EI_MASK; + rpupei = intel_uncore_read_fw(uncore, GEN6_RP_CUR_UP_EI) & GEN6_RP_EI_MASK; + rpdown = intel_uncore_read_fw(uncore, GEN6_RP_CUR_DOWN) & GEN6_RP_EI_MASK; + rpdownei = intel_uncore_read_fw(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_RP_EI_MASK; + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + + seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n", + rps_power_to_str(rps->power.mode)); + seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n", + rpup && rpupei ? 100 * rpup / rpupei : 0, + rps->power.up_threshold); + seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n", + rpdown && rpdownei ? 100 * rpdown / rpdownei : 0, + rps->power.down_threshold); + } else { + seq_puts(m, "\nRPS Autotuning inactive\n"); + } + + return 0; +} + +static bool rps_eval(void *data) +{ + struct intel_gt *gt = data; + + return HAS_RPS(gt->i915); +} + +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rps_boost); + +void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root) +{ + static const struct intel_gt_debugfs_file files[] = { + { "drpc", &drpc_fops, NULL }, + { "frequency", &frequency_fops, NULL }, + { "forcewake", &fw_domains_fops, NULL }, + { "llc", &llc_fops, llc_eval }, + { "rps_boost", &rps_boost_fops, rps_eval }, + }; + + intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h new file mode 100644 index 000000000000..b5c6a00cfa04 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_GT_PM_DEBUGFS_H +#define INTEL_GT_PM_DEBUGFS_H + +struct intel_gt; +struct dentry; + +void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root); + +#endif /* INTEL_GT_PM_DEBUGFS_H */ -- cgit v1.2.3-70-g09d2 From d0c560316d6fc7a2189bbb7acba929e81beabd01 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 17 Sep 2021 19:57:54 -0700 Subject: drm/i915: deduplicate frequency dump on debugfs Although commit 9dd4b065446a ("drm/i915/gt: Move pm debug files into a gt aware debugfs") says it was moving debug files to gt/, the i915_frequency_info file was left behind and its implementation copied into drivers/gpu/drm/i915/gt/debugfs_gt_pm.c. Over time we had several patches having to change both places to keep them in sync (and some patches failing to do so). The initial idea was to remove i915_frequency_info, but there are user space tools using it. From a quick code search there are other scripts and test tools besides igt, so it's not simply updating igt to get rid of the older file. Here we export a function using drm_printer as parameter and make both show() implementations to call this same function. Aside from a few variable name differences, for i915_frequency_info this brings a few lines that were not previously printed: RP UP EI, RP UP THRESHOLD, RP DOWN THRESHOLD and RP DOWN EI. These came in as part of commit 9c878557b1eb ("drm/i915/gt: Use the RPM config register to determine clk frequencies"), which didn't change both places. Signed-off-by: Lucas De Marchi Acked-by: Jani Nikula Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210918025754.1254705-4-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 127 +++++++------- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h | 2 + drivers/gpu/drm/i915/i915_debugfs.c | 231 +------------------------- 3 files changed, 76 insertions(+), 284 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 250467108eda..5f84ad602642 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -240,9 +240,8 @@ static int drpc_show(struct seq_file *m, void *unused) } DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(drpc); -static int frequency_show(struct seq_file *m, void *unused) +void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p) { - struct intel_gt *gt = m->private; struct drm_i915_private *i915 = gt->i915; struct intel_uncore *uncore = gt->uncore; struct intel_rps *rps = >->rps; @@ -254,21 +253,21 @@ static int frequency_show(struct seq_file *m, void *unused) u16 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); u16 rgvstat = intel_uncore_read16(uncore, MEMSTAT_ILK); - seq_printf(m, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf); - seq_printf(m, "Requested VID: %d\n", rgvswctl & 0x3f); - seq_printf(m, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >> + drm_printf(p, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf); + drm_printf(p, "Requested VID: %d\n", rgvswctl & 0x3f); + drm_printf(p, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >> MEMSTAT_VID_SHIFT); - seq_printf(m, "Current P-state: %d\n", + drm_printf(p, "Current P-state: %d\n", (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT); } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { u32 rpmodectl, freq_sts; rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); - seq_printf(m, "Video Turbo Mode: %s\n", + drm_printf(p, "Video Turbo Mode: %s\n", yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); - seq_printf(m, "HW control enabled: %s\n", + drm_printf(p, "HW control enabled: %s\n", yesno(rpmodectl & GEN6_RP_ENABLE)); - seq_printf(m, "SW control enabled: %s\n", + drm_printf(p, "SW control enabled: %s\n", yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE)); @@ -276,25 +275,25 @@ static int frequency_show(struct seq_file *m, void *unused) freq_sts = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(i915); - seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts); - seq_printf(m, "DDR freq: %d MHz\n", i915->mem_freq); + drm_printf(p, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts); + drm_printf(p, "DDR freq: %d MHz\n", i915->mem_freq); - seq_printf(m, "actual GPU freq: %d MHz\n", + drm_printf(p, "actual GPU freq: %d MHz\n", intel_gpu_freq(rps, (freq_sts >> 8) & 0xff)); - seq_printf(m, "current GPU freq: %d MHz\n", + drm_printf(p, "current GPU freq: %d MHz\n", intel_gpu_freq(rps, rps->cur_freq)); - seq_printf(m, "max GPU freq: %d MHz\n", + drm_printf(p, "max GPU freq: %d MHz\n", intel_gpu_freq(rps, rps->max_freq)); - seq_printf(m, "min GPU freq: %d MHz\n", + drm_printf(p, "min GPU freq: %d MHz\n", intel_gpu_freq(rps, rps->min_freq)); - seq_printf(m, "idle GPU freq: %d MHz\n", + drm_printf(p, "idle GPU freq: %d MHz\n", intel_gpu_freq(rps, rps->idle_freq)); - seq_printf(m, "efficient (RPe) frequency: %d MHz\n", + drm_printf(p, "efficient (RPe) frequency: %d MHz\n", intel_gpu_freq(rps, rps->efficient_freq)); } else if (GRAPHICS_VER(i915) >= 6) { u32 rp_state_limits; @@ -374,109 +373,117 @@ static int frequency_show(struct seq_file *m, void *unused) } pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); - seq_printf(m, "Video Turbo Mode: %s\n", + drm_printf(p, "Video Turbo Mode: %s\n", yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); - seq_printf(m, "HW control enabled: %s\n", + drm_printf(p, "HW control enabled: %s\n", yesno(rpmodectl & GEN6_RP_ENABLE)); - seq_printf(m, "SW control enabled: %s\n", + drm_printf(p, "SW control enabled: %s\n", yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE)); - seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", + drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", pm_ier, pm_imr, pm_mask); if (GRAPHICS_VER(i915) <= 10) - seq_printf(m, "PM ISR=0x%08x IIR=0x%08x\n", + drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n", pm_isr, pm_iir); - seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n", + drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", rps->pm_intrmsk_mbz); - seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); - seq_printf(m, "Render p-state ratio: %d\n", + drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); + drm_printf(p, "Render p-state ratio: %d\n", (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8); - seq_printf(m, "Render p-state VID: %d\n", + drm_printf(p, "Render p-state VID: %d\n", gt_perf_status & 0xff); - seq_printf(m, "Render p-state limit: %d\n", + drm_printf(p, "Render p-state limit: %d\n", rp_state_limits & 0xff); - seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat); - seq_printf(m, "RPMODECTL: 0x%08x\n", rpmodectl); - seq_printf(m, "RPINCLIMIT: 0x%08x\n", rpinclimit); - seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit); - seq_printf(m, "RPNSWREQ: %dMHz\n", reqf); - seq_printf(m, "CAGF: %dMHz\n", cagf); - seq_printf(m, "RP CUR UP EI: %d (%lldns)\n", + drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat); + drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl); + drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit); + drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit); + drm_printf(p, "RPNSWREQ: %dMHz\n", reqf); + drm_printf(p, "CAGF: %dMHz\n", cagf); + drm_printf(p, "RP CUR UP EI: %d (%lldns)\n", rpcurupei, intel_gt_pm_interval_to_ns(gt, rpcurupei)); - seq_printf(m, "RP CUR UP: %d (%lldns)\n", + drm_printf(p, "RP CUR UP: %d (%lldns)\n", rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup)); - seq_printf(m, "RP PREV UP: %d (%lldns)\n", + drm_printf(p, "RP PREV UP: %d (%lldns)\n", rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup)); - seq_printf(m, "Up threshold: %d%%\n", + drm_printf(p, "Up threshold: %d%%\n", rps->power.up_threshold); - seq_printf(m, "RP UP EI: %d (%lldns)\n", + drm_printf(p, "RP UP EI: %d (%lldns)\n", rpupei, intel_gt_pm_interval_to_ns(gt, rpupei)); - seq_printf(m, "RP UP THRESHOLD: %d (%lldns)\n", + drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n", rpupt, intel_gt_pm_interval_to_ns(gt, rpupt)); - seq_printf(m, "RP CUR DOWN EI: %d (%lldns)\n", + drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n", rpcurdownei, intel_gt_pm_interval_to_ns(gt, rpcurdownei)); - seq_printf(m, "RP CUR DOWN: %d (%lldns)\n", + drm_printf(p, "RP CUR DOWN: %d (%lldns)\n", rpcurdown, intel_gt_pm_interval_to_ns(gt, rpcurdown)); - seq_printf(m, "RP PREV DOWN: %d (%lldns)\n", + drm_printf(p, "RP PREV DOWN: %d (%lldns)\n", rpprevdown, intel_gt_pm_interval_to_ns(gt, rpprevdown)); - seq_printf(m, "Down threshold: %d%%\n", + drm_printf(p, "Down threshold: %d%%\n", rps->power.down_threshold); - seq_printf(m, "RP DOWN EI: %d (%lldns)\n", + drm_printf(p, "RP DOWN EI: %d (%lldns)\n", rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei)); - seq_printf(m, "RP DOWN THRESHOLD: %d (%lldns)\n", + drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n", rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt)); max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; max_freq *= (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); - seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", + drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; max_freq *= (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); - seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", + drm_printf(p, "Nominal (RP1) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; max_freq *= (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); - seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", + drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); - seq_printf(m, "Max overclocked frequency: %dMHz\n", + drm_printf(p, "Max overclocked frequency: %dMHz\n", intel_gpu_freq(rps, rps->max_freq)); - seq_printf(m, "Current freq: %d MHz\n", + drm_printf(p, "Current freq: %d MHz\n", intel_gpu_freq(rps, rps->cur_freq)); - seq_printf(m, "Actual freq: %d MHz\n", cagf); - seq_printf(m, "Idle freq: %d MHz\n", + drm_printf(p, "Actual freq: %d MHz\n", cagf); + drm_printf(p, "Idle freq: %d MHz\n", intel_gpu_freq(rps, rps->idle_freq)); - seq_printf(m, "Min freq: %d MHz\n", + drm_printf(p, "Min freq: %d MHz\n", intel_gpu_freq(rps, rps->min_freq)); - seq_printf(m, "Boost freq: %d MHz\n", + drm_printf(p, "Boost freq: %d MHz\n", intel_gpu_freq(rps, rps->boost_freq)); - seq_printf(m, "Max freq: %d MHz\n", + drm_printf(p, "Max freq: %d MHz\n", intel_gpu_freq(rps, rps->max_freq)); - seq_printf(m, + drm_printf(p, "efficient (RPe) frequency: %d MHz\n", intel_gpu_freq(rps, rps->efficient_freq)); } else { - seq_puts(m, "no P-state info available\n"); + drm_puts(p, "no P-state info available\n"); } - seq_printf(m, "Current CD clock frequency: %d kHz\n", i915->cdclk.hw.cdclk); - seq_printf(m, "Max CD clock frequency: %d kHz\n", i915->max_cdclk_freq); - seq_printf(m, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq); + drm_printf(p, "Current CD clock frequency: %d kHz\n", i915->cdclk.hw.cdclk); + drm_printf(p, "Max CD clock frequency: %d kHz\n", i915->max_cdclk_freq); + drm_printf(p, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq); intel_runtime_pm_put(uncore->rpm, wakeref); +} + +static int frequency_show(struct seq_file *m, void *unused) +{ + struct intel_gt *gt = m->private; + struct drm_printer p = drm_seq_file_printer(m); + + intel_gt_pm_frequency_dump(gt, &p); return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h index b5c6a00cfa04..2b824289582b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h @@ -8,7 +8,9 @@ struct intel_gt; struct dentry; +struct drm_printer; void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root); +void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *m); #endif /* INTEL_GT_PM_DEBUGFS_H */ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 04351a851586..9f2cfcf7aa6a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -32,13 +32,14 @@ #include #include "gem/i915_gem_context.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_buffer_pool.h" #include "gt/intel_gt_clock_utils.h" -#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt_pm_debugfs.h" #include "gt/intel_gt_requests.h" -#include "gt/intel_reset.h" #include "gt/intel_rc6.h" +#include "gt/intel_reset.h" #include "gt/intel_rps.h" #include "gt/intel_sseu_debugfs.h" @@ -354,230 +355,12 @@ static const struct file_operations i915_error_state_fops = { static int i915_frequency_info(struct seq_file *m, void *unused) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_uncore *uncore = &dev_priv->uncore; - struct intel_rps *rps = &dev_priv->gt.rps; - intel_wakeref_t wakeref; - - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - - if (GRAPHICS_VER(dev_priv) == 5) { - u16 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); - u16 rgvstat = intel_uncore_read16(uncore, MEMSTAT_ILK); - - seq_printf(m, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf); - seq_printf(m, "Requested VID: %d\n", rgvswctl & 0x3f); - seq_printf(m, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >> - MEMSTAT_VID_SHIFT); - seq_printf(m, "Current P-state: %d\n", - (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT); - } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - u32 rpmodectl, freq_sts; - - rpmodectl = intel_uncore_read(&dev_priv->uncore, GEN6_RP_CONTROL); - seq_printf(m, "Video Turbo Mode: %s\n", - yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); - seq_printf(m, "HW control enabled: %s\n", - yesno(rpmodectl & GEN6_RP_ENABLE)); - seq_printf(m, "SW control enabled: %s\n", - yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == - GEN6_RP_MEDIA_SW_MODE)); - - vlv_punit_get(dev_priv); - freq_sts = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - vlv_punit_put(dev_priv); - - seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts); - seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq); - - seq_printf(m, "actual GPU freq: %d MHz\n", - intel_gpu_freq(rps, (freq_sts >> 8) & 0xff)); - - seq_printf(m, "current GPU freq: %d MHz\n", - intel_gpu_freq(rps, rps->cur_freq)); - - seq_printf(m, "max GPU freq: %d MHz\n", - intel_gpu_freq(rps, rps->max_freq)); - - seq_printf(m, "min GPU freq: %d MHz\n", - intel_gpu_freq(rps, rps->min_freq)); - - seq_printf(m, "idle GPU freq: %d MHz\n", - intel_gpu_freq(rps, rps->idle_freq)); - - seq_printf(m, - "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(rps, rps->efficient_freq)); - } else if (GRAPHICS_VER(dev_priv) >= 6) { - u32 rp_state_limits; - u32 gt_perf_status; - u32 rp_state_cap; - u32 rpmodectl, rpinclimit, rpdeclimit; - u32 rpstat, cagf, reqf; - u32 rpupei, rpcurup, rpprevup; - u32 rpdownei, rpcurdown, rpprevdown; - u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask; - int max_freq; - - rp_state_limits = intel_uncore_read(&dev_priv->uncore, GEN6_RP_STATE_LIMITS); - rp_state_cap = intel_rps_read_state_cap(rps); - if (IS_GEN9_LP(dev_priv)) - gt_perf_status = intel_uncore_read(&dev_priv->uncore, BXT_GT_PERF_STATUS); - else - gt_perf_status = intel_uncore_read(&dev_priv->uncore, GEN6_GT_PERF_STATUS); - - /* RPSTAT1 is in the GT power well */ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - reqf = intel_uncore_read(&dev_priv->uncore, GEN6_RPNSWREQ); - if (GRAPHICS_VER(dev_priv) >= 9) - reqf >>= 23; - else { - reqf &= ~GEN6_TURBO_DISABLE; - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - reqf >>= 24; - else - reqf >>= 25; - } - reqf = intel_gpu_freq(rps, reqf); - - rpmodectl = intel_uncore_read(&dev_priv->uncore, GEN6_RP_CONTROL); - rpinclimit = intel_uncore_read(&dev_priv->uncore, GEN6_RP_UP_THRESHOLD); - rpdeclimit = intel_uncore_read(&dev_priv->uncore, GEN6_RP_DOWN_THRESHOLD); - - rpstat = intel_uncore_read(&dev_priv->uncore, GEN6_RPSTAT1); - rpupei = intel_uncore_read(&dev_priv->uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; - rpcurup = intel_uncore_read(&dev_priv->uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; - rpprevup = intel_uncore_read(&dev_priv->uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; - rpdownei = intel_uncore_read(&dev_priv->uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; - rpcurdown = intel_uncore_read(&dev_priv->uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; - rpprevdown = intel_uncore_read(&dev_priv->uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; - cagf = intel_rps_read_actual_frequency(rps); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - - if (GRAPHICS_VER(dev_priv) >= 11) { - pm_ier = intel_uncore_read(&dev_priv->uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE); - pm_imr = intel_uncore_read(&dev_priv->uncore, GEN11_GPM_WGBOXPERF_INTR_MASK); - /* - * The equivalent to the PM ISR & IIR cannot be read - * without affecting the current state of the system - */ - pm_isr = 0; - pm_iir = 0; - } else if (GRAPHICS_VER(dev_priv) >= 8) { - pm_ier = intel_uncore_read(&dev_priv->uncore, GEN8_GT_IER(2)); - pm_imr = intel_uncore_read(&dev_priv->uncore, GEN8_GT_IMR(2)); - pm_isr = intel_uncore_read(&dev_priv->uncore, GEN8_GT_ISR(2)); - pm_iir = intel_uncore_read(&dev_priv->uncore, GEN8_GT_IIR(2)); - } else { - pm_ier = intel_uncore_read(&dev_priv->uncore, GEN6_PMIER); - pm_imr = intel_uncore_read(&dev_priv->uncore, GEN6_PMIMR); - pm_isr = intel_uncore_read(&dev_priv->uncore, GEN6_PMISR); - pm_iir = intel_uncore_read(&dev_priv->uncore, GEN6_PMIIR); - } - pm_mask = intel_uncore_read(&dev_priv->uncore, GEN6_PMINTRMSK); - - seq_printf(m, "Video Turbo Mode: %s\n", - yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); - seq_printf(m, "HW control enabled: %s\n", - yesno(rpmodectl & GEN6_RP_ENABLE)); - seq_printf(m, "SW control enabled: %s\n", - yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == - GEN6_RP_MEDIA_SW_MODE)); - - seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", - pm_ier, pm_imr, pm_mask); - if (GRAPHICS_VER(dev_priv) <= 10) - seq_printf(m, "PM ISR=0x%08x IIR=0x%08x\n", - pm_isr, pm_iir); - seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n", - rps->pm_intrmsk_mbz); - seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); - seq_printf(m, "Render p-state ratio: %d\n", - (gt_perf_status & (GRAPHICS_VER(dev_priv) >= 9 ? 0x1ff00 : 0xff00)) >> 8); - seq_printf(m, "Render p-state VID: %d\n", - gt_perf_status & 0xff); - seq_printf(m, "Render p-state limit: %d\n", - rp_state_limits & 0xff); - seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat); - seq_printf(m, "RPMODECTL: 0x%08x\n", rpmodectl); - seq_printf(m, "RPINCLIMIT: 0x%08x\n", rpinclimit); - seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit); - seq_printf(m, "RPNSWREQ: %dMHz\n", reqf); - seq_printf(m, "CAGF: %dMHz\n", cagf); - seq_printf(m, "RP CUR UP EI: %d (%lldns)\n", - rpupei, - intel_gt_pm_interval_to_ns(&dev_priv->gt, rpupei)); - seq_printf(m, "RP CUR UP: %d (%lldun)\n", - rpcurup, - intel_gt_pm_interval_to_ns(&dev_priv->gt, rpcurup)); - seq_printf(m, "RP PREV UP: %d (%lldns)\n", - rpprevup, - intel_gt_pm_interval_to_ns(&dev_priv->gt, rpprevup)); - seq_printf(m, "Up threshold: %d%%\n", - rps->power.up_threshold); - - seq_printf(m, "RP CUR DOWN EI: %d (%lldns)\n", - rpdownei, - intel_gt_pm_interval_to_ns(&dev_priv->gt, - rpdownei)); - seq_printf(m, "RP CUR DOWN: %d (%lldns)\n", - rpcurdown, - intel_gt_pm_interval_to_ns(&dev_priv->gt, - rpcurdown)); - seq_printf(m, "RP PREV DOWN: %d (%lldns)\n", - rpprevdown, - intel_gt_pm_interval_to_ns(&dev_priv->gt, - rpprevdown)); - seq_printf(m, "Down threshold: %d%%\n", - rps->power.down_threshold); - - max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 : - rp_state_cap >> 16) & 0xff; - max_freq *= (IS_GEN9_BC(dev_priv) || - GRAPHICS_VER(dev_priv) >= 11 ? GEN9_FREQ_SCALER : 1); - seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", - intel_gpu_freq(rps, max_freq)); - - max_freq = (rp_state_cap & 0xff00) >> 8; - max_freq *= (IS_GEN9_BC(dev_priv) || - GRAPHICS_VER(dev_priv) >= 11 ? GEN9_FREQ_SCALER : 1); - seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", - intel_gpu_freq(rps, max_freq)); - - max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 16 : - rp_state_cap >> 0) & 0xff; - max_freq *= (IS_GEN9_BC(dev_priv) || - GRAPHICS_VER(dev_priv) >= 11 ? GEN9_FREQ_SCALER : 1); - seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", - intel_gpu_freq(rps, max_freq)); - seq_printf(m, "Max overclocked frequency: %dMHz\n", - intel_gpu_freq(rps, rps->max_freq)); - - seq_printf(m, "Current freq: %d MHz\n", - intel_gpu_freq(rps, rps->cur_freq)); - seq_printf(m, "Actual freq: %d MHz\n", cagf); - seq_printf(m, "Idle freq: %d MHz\n", - intel_gpu_freq(rps, rps->idle_freq)); - seq_printf(m, "Min freq: %d MHz\n", - intel_gpu_freq(rps, rps->min_freq)); - seq_printf(m, "Boost freq: %d MHz\n", - intel_gpu_freq(rps, rps->boost_freq)); - seq_printf(m, "Max freq: %d MHz\n", - intel_gpu_freq(rps, rps->max_freq)); - seq_printf(m, - "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(rps, rps->efficient_freq)); - } else { - seq_puts(m, "no P-state info available\n"); - } + struct drm_i915_private *i915 = node_to_i915(m->private); + struct intel_gt *gt = &i915->gt; + struct drm_printer p = drm_seq_file_printer(m); - seq_printf(m, "Current CD clock frequency: %d kHz\n", dev_priv->cdclk.hw.cdclk); - seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq); - seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq); + intel_gt_pm_frequency_dump(gt, &p); - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); return 0; } -- cgit v1.2.3-70-g09d2 From d0a652493abd86180ad0cc0ed44427831d37fabe Mon Sep 17 00:00:00 2001 From: Venkata Sandeep Dhanalakota Date: Fri, 17 Sep 2021 10:08:45 -0700 Subject: drm/i915: Make wa list per-gt Support for multiple GT's within a single i915 device will be arriving soon. Since each GT may have its own fusing and require different workarounds, we need to make the GT workaround functions and multicast steering setup per-gt. Cc: Tvrtko Ursulin Cc: Daniele Ceraolo Spurio Signed-off-by: Venkata Sandeep Dhanalakota Signed-off-by: Matt Roper Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20210917170845.836358-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 3 + drivers/gpu/drm/i915/gt/intel_gt_types.h | 2 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 141 +++++++++++++------------ drivers/gpu/drm/i915/gt/intel_workarounds.h | 2 +- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 2 +- drivers/gpu/drm/i915/i915_drv.c | 2 - drivers/gpu/drm/i915/i915_drv.h | 2 - drivers/gpu/drm/i915/i915_gem.c | 2 - 8 files changed, 81 insertions(+), 75 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 985922c03297..0c99424bf810 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -660,6 +660,8 @@ int intel_gt_init(struct intel_gt *gt) if (err) return err; + intel_gt_init_workarounds(gt); + /* * This is just a security blanket to placate dragons. * On some systems, we very sporadically observe that the first TLBs @@ -767,6 +769,7 @@ void intel_gt_driver_release(struct intel_gt *gt) if (vm) /* FIXME being called twice on error paths :( */ i915_vm_put(vm); + intel_wa_list_free(>->wa_list); intel_gt_pm_fini(gt); intel_gt_fini_scratch(gt); intel_gt_fini_buffer_pool(gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 6fdcde64c180..ce127cae9e49 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -72,6 +72,8 @@ struct intel_gt { struct intel_uc uc; + struct i915_wa_list wa_list; + struct intel_gt_timelines { spinlock_t lock; /* protects active_list */ struct list_head active_list; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index c314d4917b6b..e1f362530889 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -804,7 +804,7 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) } static void -gen4_gt_workarounds_init(struct drm_i915_private *i915, +gen4_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */ @@ -812,29 +812,29 @@ gen4_gt_workarounds_init(struct drm_i915_private *i915, } static void -g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +g4x_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - gen4_gt_workarounds_init(i915, wal); + gen4_gt_workarounds_init(gt, wal); /* WaDisableRenderCachePipelinedFlush:g4x,ilk */ wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE); } static void -ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +ilk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - g4x_gt_workarounds_init(i915, wal); + g4x_gt_workarounds_init(gt, wal); wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED); } static void -snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +snb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { } static void -ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +ivb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ wa_masked_dis(wal, @@ -850,7 +850,7 @@ ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void -vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +vlv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { /* WaForceL3Serialization:vlv */ wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); @@ -863,7 +863,7 @@ vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void -hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +hsw_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { /* L3 caching of data atomics doesn't work -- disable it. */ wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); @@ -878,8 +878,10 @@ hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void -gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +gen9_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { + struct drm_i915_private *i915 = gt->i915; + /* WaDisableKillLogic:bxt,skl,kbl */ if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915)) wa_write_or(wal, @@ -904,9 +906,9 @@ gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal } static void -skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - gen9_gt_workarounds_init(i915, wal); + gen9_gt_workarounds_init(gt, wal); /* WaDisableGafsUnitClkGating:skl */ wa_write_or(wal, @@ -914,19 +916,19 @@ skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_GT_STEP(i915, STEP_A0, STEP_H0)) + if (IS_SKL_GT_STEP(gt->i915, STEP_A0, STEP_H0)) wa_write_or(wal, GEN9_GAMT_ECO_REG_RW_IA, GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); } static void -kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - gen9_gt_workarounds_init(i915, wal); + gen9_gt_workarounds_init(gt, wal); /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_GT_STEP(i915, 0, STEP_C0)) + if (IS_KBL_GT_STEP(gt->i915, 0, STEP_C0)) wa_write_or(wal, GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); @@ -943,15 +945,15 @@ kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void -glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +glk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - gen9_gt_workarounds_init(i915, wal); + gen9_gt_workarounds_init(gt, wal); } static void -cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +cfl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - gen9_gt_workarounds_init(i915, wal); + gen9_gt_workarounds_init(gt, wal); /* WaDisableGafsUnitClkGating:cfl */ wa_write_or(wal, @@ -976,21 +978,21 @@ static void __set_mcr_steering(struct i915_wa_list *wal, wa_write_clr_set(wal, steering_reg, mcr_mask, mcr); } -static void __add_mcr_wa(struct drm_i915_private *i915, struct i915_wa_list *wal, +static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal, unsigned int slice, unsigned int subslice) { - drm_dbg(&i915->drm, "MCR slice=0x%x, subslice=0x%x\n", slice, subslice); + drm_dbg(>->i915->drm, "MCR slice=0x%x, subslice=0x%x\n", slice, subslice); __set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice); } static void -icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) +icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) { - const struct sseu_dev_info *sseu = &i915->gt.info.sseu; + const struct sseu_dev_info *sseu = >->info.sseu; unsigned int slice, subslice; - GEM_BUG_ON(GRAPHICS_VER(i915) < 11); + GEM_BUG_ON(GRAPHICS_VER(gt->i915) < 11); GEM_BUG_ON(hweight8(sseu->slice_mask) > 1); slice = 0; @@ -1010,16 +1012,15 @@ icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) * then we can just rely on the default steering and won't need to * worry about explicitly re-steering L3BANK reads later. */ - if (i915->gt.info.l3bank_mask & BIT(subslice)) - i915->gt.steering_table[L3BANK] = NULL; + if (gt->info.l3bank_mask & BIT(subslice)) + gt->steering_table[L3BANK] = NULL; - __add_mcr_wa(i915, wal, slice, subslice); + __add_mcr_wa(gt, wal, slice, subslice); } static void xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) { - struct drm_i915_private *i915 = gt->i915; const struct sseu_dev_info *sseu = >->info.sseu; unsigned long slice, subslice = 0, slice_mask = 0; u64 dss_mask = 0; @@ -1083,7 +1084,7 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) WARN_ON(subslice > GEN_DSS_PER_GSLICE); WARN_ON(dss_mask >> (slice * GEN_DSS_PER_GSLICE) == 0); - __add_mcr_wa(i915, wal, slice, subslice); + __add_mcr_wa(gt, wal, slice, subslice); /* * SQIDI ranges are special because they use different steering @@ -1099,9 +1100,11 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) } static void -icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - icl_wa_init_mcr(i915, wal); + struct drm_i915_private *i915 = gt->i915; + + icl_wa_init_mcr(gt, wal); /* WaModifyGamTlbPartitioning:icl */ wa_write_clr_set(wal, @@ -1152,10 +1155,9 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) * the engine-specific workaround list. */ static void -wa_14011060649(struct drm_i915_private *i915, struct i915_wa_list *wal) +wa_14011060649(struct intel_gt *gt, struct i915_wa_list *wal) { struct intel_engine_cs *engine; - struct intel_gt *gt = &i915->gt; int id; for_each_engine(engine, gt, id) { @@ -1169,22 +1171,23 @@ wa_14011060649(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void -gen12_gt_workarounds_init(struct drm_i915_private *i915, - struct i915_wa_list *wal) +gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - icl_wa_init_mcr(i915, wal); + icl_wa_init_mcr(gt, wal); /* Wa_14011060649:tgl,rkl,dg1,adl-s,adl-p */ - wa_14011060649(i915, wal); + wa_14011060649(gt, wal); /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */ wa_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); } static void -tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - gen12_gt_workarounds_init(i915, wal); + struct drm_i915_private *i915 = gt->i915; + + gen12_gt_workarounds_init(gt, wal); /* Wa_1409420604:tgl */ if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) @@ -1205,9 +1208,11 @@ tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void -dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - gen12_gt_workarounds_init(i915, wal); + struct drm_i915_private *i915 = gt->i915; + + gen12_gt_workarounds_init(gt, wal); /* Wa_1607087056:dg1 */ if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0)) @@ -1229,60 +1234,62 @@ dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void -xehpsdv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - xehp_init_mcr(&i915->gt, wal); + xehp_init_mcr(gt, wal); } static void -gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) +gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) { + struct drm_i915_private *i915 = gt->i915; + if (IS_XEHPSDV(i915)) - xehpsdv_gt_workarounds_init(i915, wal); + xehpsdv_gt_workarounds_init(gt, wal); else if (IS_DG1(i915)) - dg1_gt_workarounds_init(i915, wal); + dg1_gt_workarounds_init(gt, wal); else if (IS_TIGERLAKE(i915)) - tgl_gt_workarounds_init(i915, wal); + tgl_gt_workarounds_init(gt, wal); else if (GRAPHICS_VER(i915) == 12) - gen12_gt_workarounds_init(i915, wal); + gen12_gt_workarounds_init(gt, wal); else if (GRAPHICS_VER(i915) == 11) - icl_gt_workarounds_init(i915, wal); + icl_gt_workarounds_init(gt, wal); else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) - cfl_gt_workarounds_init(i915, wal); + cfl_gt_workarounds_init(gt, wal); else if (IS_GEMINILAKE(i915)) - glk_gt_workarounds_init(i915, wal); + glk_gt_workarounds_init(gt, wal); else if (IS_KABYLAKE(i915)) - kbl_gt_workarounds_init(i915, wal); + kbl_gt_workarounds_init(gt, wal); else if (IS_BROXTON(i915)) - gen9_gt_workarounds_init(i915, wal); + gen9_gt_workarounds_init(gt, wal); else if (IS_SKYLAKE(i915)) - skl_gt_workarounds_init(i915, wal); + skl_gt_workarounds_init(gt, wal); else if (IS_HASWELL(i915)) - hsw_gt_workarounds_init(i915, wal); + hsw_gt_workarounds_init(gt, wal); else if (IS_VALLEYVIEW(i915)) - vlv_gt_workarounds_init(i915, wal); + vlv_gt_workarounds_init(gt, wal); else if (IS_IVYBRIDGE(i915)) - ivb_gt_workarounds_init(i915, wal); + ivb_gt_workarounds_init(gt, wal); else if (GRAPHICS_VER(i915) == 6) - snb_gt_workarounds_init(i915, wal); + snb_gt_workarounds_init(gt, wal); else if (GRAPHICS_VER(i915) == 5) - ilk_gt_workarounds_init(i915, wal); + ilk_gt_workarounds_init(gt, wal); else if (IS_G4X(i915)) - g4x_gt_workarounds_init(i915, wal); + g4x_gt_workarounds_init(gt, wal); else if (GRAPHICS_VER(i915) == 4) - gen4_gt_workarounds_init(i915, wal); + gen4_gt_workarounds_init(gt, wal); else if (GRAPHICS_VER(i915) <= 8) ; else MISSING_CASE(GRAPHICS_VER(i915)); } -void intel_gt_init_workarounds(struct drm_i915_private *i915) +void intel_gt_init_workarounds(struct intel_gt *gt) { - struct i915_wa_list *wal = &i915->gt_wa_list; + struct i915_wa_list *wal = >->wa_list; wa_init_start(wal, "GT", "global"); - gt_init_workarounds(i915, wal); + gt_init_workarounds(gt, wal); wa_init_finish(wal); } @@ -1353,7 +1360,7 @@ wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal) void intel_gt_apply_workarounds(struct intel_gt *gt) { - wa_list_apply(gt, >->i915->gt_wa_list); + wa_list_apply(gt, >->wa_list); } static bool wa_list_verify(struct intel_gt *gt, @@ -1385,7 +1392,7 @@ static bool wa_list_verify(struct intel_gt *gt, bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from) { - return wa_list_verify(gt, >->i915->gt_wa_list, from); + return wa_list_verify(gt, >->wa_list, from); } __maybe_unused diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.h b/drivers/gpu/drm/i915/gt/intel_workarounds.h index 15abb68b6c00..9beaab77c7f0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.h +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.h @@ -24,7 +24,7 @@ static inline void intel_wa_list_free(struct i915_wa_list *wal) void intel_engine_init_ctx_wa(struct intel_engine_cs *engine); int intel_engine_emit_ctx_wa(struct i915_request *rq); -void intel_gt_init_workarounds(struct drm_i915_private *i915); +void intel_gt_init_workarounds(struct intel_gt *gt); void intel_gt_apply_workarounds(struct intel_gt *gt); bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from); diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index e623ac45f4aa..962e91ba3be4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -66,7 +66,7 @@ reference_lists_init(struct intel_gt *gt, struct wa_lists *lists) memset(lists, 0, sizeof(*lists)); wa_init_start(&lists->gt_wa_list, "GT_REF", "global"); - gt_init_workarounds(gt->i915, &lists->gt_wa_list); + gt_init_workarounds(gt, &lists->gt_wa_list); wa_init_finish(&lists->gt_wa_list); for_each_engine(engine, gt, id) { diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 59fb4c710c8c..3cf61bead2f6 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -588,8 +588,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) pci_set_master(pdev); - intel_gt_init_workarounds(dev_priv); - /* On the 945G/GM, the chipset reports the MSI capability on the * integrated graphics even though the support isn't actually there * according to the published specs. It doesn't appear to function diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 74fd70385273..93d13aba6a85 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1022,8 +1022,6 @@ struct drm_i915_private { */ u8 active_pipes; - struct i915_wa_list gt_wa_list; - struct i915_frontbuffer_tracking fb_tracking; struct intel_atomic_helper { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 590efc8b0265..981e383d1a5d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1139,8 +1139,6 @@ void i915_gem_driver_release(struct drm_i915_private *dev_priv) { intel_gt_driver_release(&dev_priv->gt); - intel_wa_list_free(&dev_priv->gt_wa_list); - intel_uc_cleanup_firmwares(&dev_priv->gt.uc); i915_gem_drain_freed_objects(dev_priv); -- cgit v1.2.3-70-g09d2 From 53718bff8f4054bd2ad4e4799ebf2ecbeed453d5 Mon Sep 17 00:00:00 2001 From: Ayaz A Siddiqui Date: Thu, 16 Sep 2021 11:57:36 +0530 Subject: drm/i915/gt: Add "intel_" as prefix in set_mocs_index() Adding missing "intel_" prefix in set_mocs_index(). Fixes: b62aa57e3c78 ("drm/i915/gt: Add support of mocs propagation") Cc: Matt Roper Signed-off-by: Ayaz A Siddiqui Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210916062736.1733587-1-ayaz.siddiqui@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 2 +- drivers/gpu/drm/i915/gt/intel_mocs.c | 2 +- drivers/gpu/drm/i915/gt/intel_mocs.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 0c99424bf810..5753c5943ed9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -684,7 +684,7 @@ int intel_gt_init(struct intel_gt *gt) goto err_pm; } - set_mocs_index(gt); + intel_set_mocs_index(gt); err = intel_engines_init(gt); if (err) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index e4b97cd14cf9..15f9ada28a7a 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -616,7 +616,7 @@ static u32 global_mocs_offset(void) return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)); } -void set_mocs_index(struct intel_gt *gt) +void intel_set_mocs_index(struct intel_gt *gt) { struct drm_i915_mocs_table table; diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h index 8a09d64b115f..76db827210c0 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.h +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h @@ -36,6 +36,6 @@ struct intel_gt; void intel_mocs_init(struct intel_gt *gt); void intel_mocs_init_engine(struct intel_engine_cs *engine); -void set_mocs_index(struct intel_gt *gt); +void intel_set_mocs_index(struct intel_gt *gt); #endif -- cgit v1.2.3-70-g09d2 From ea97e44f83e25cb63c70293d71ab625a23f8a0cc Mon Sep 17 00:00:00 2001 From: Venkata Sandeep Dhanalakota Date: Thu, 16 Sep 2021 09:28:15 -0700 Subject: drm/i915: Do not define vma on stack Defining vma on stack can cause stack overflow, if vma gets populated with new fields. v2: (Daniel Vetter) - Add kerneldoc for new field Cc: Daniele Ceraolo Spurio Cc: Tvrtko Ursulin Signed-off-by: Venkata Sandeep Dhanalakota Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210916162819.27848-2-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 18 +++++++++--------- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 9 +++++++++ 2 files changed, 18 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 3a16d08608a5..f632dbd32b42 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -413,20 +413,20 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) { struct drm_i915_gem_object *obj = uc_fw->obj; struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt; - struct i915_vma dummy = { - .node.start = uc_fw_ggtt_offset(uc_fw), - .node.size = obj->base.size, - .pages = obj->mm.pages, - .vm = &ggtt->vm, - }; + struct i915_vma *dummy = &uc_fw->dummy; + + dummy->node.start = uc_fw_ggtt_offset(uc_fw); + dummy->node.size = obj->base.size; + dummy->pages = obj->mm.pages; + dummy->vm = &ggtt->vm; GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - GEM_BUG_ON(dummy.node.size > ggtt->uc_fw.size); + GEM_BUG_ON(dummy->node.size > ggtt->uc_fw.size); /* uc_fw->obj cache domains were not controlled across suspend */ - drm_clflush_sg(dummy.pages); + drm_clflush_sg(dummy->pages); - ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0); + ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, 0); } static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 99bb1fe1af66..1e00bf65639e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -10,6 +10,7 @@ #include "intel_uc_fw_abi.h" #include "intel_device_info.h" #include "i915_gem.h" +#include "i915_vma.h" struct drm_printer; struct drm_i915_private; @@ -75,6 +76,14 @@ struct intel_uc_fw { bool user_overridden; size_t size; struct drm_i915_gem_object *obj; + /** + * @dummy: A vma used in binding the uc fw to ggtt. We can't define this + * vma on the stack as it can lead to a stack overflow, so we define it + * here. Safe to have 1 copy per uc fw because the binding is single + * threaded as it done during driver load (inherently single threaded) + * or during a GT reset (mutex guarantees single threaded). + */ + struct i915_vma dummy; /* * The firmware build process will generate a version header file with major and -- cgit v1.2.3-70-g09d2 From 7acbbc7cf4851b42b723098aa5699e8fca634e4a Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 16 Sep 2021 09:28:16 -0700 Subject: drm/i915/guc: put all guc objects in lmem when available The firmware binary has to be loaded from lmem and the recommendation is to put all other objects in there as well. Note that we don't fall back to system memory if the allocation in lmem fails because all objects are allocated during driver load and if we have issues with lmem at that point something is seriously wrong with the system, so no point in trying to handle it. Cc: Matthew Auld Cc: Abdiel Janulgue Cc: Michal Wajdeczko Cc: Vinay Belgaumkar Cc: Radoslaw Szwichtenberg Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210916162819.27848-3-matthew.brost@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 26 +++++++++++ drivers/gpu/drm/i915/gem/i915_gem_lmem.h | 4 ++ drivers/gpu/drm/i915/gt/uc/intel_guc.c | 9 +++- drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 13 ++++-- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 14 ++++-- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 75 ++++++++++++++++++++++++++++--- 6 files changed, 128 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index eb345305dc52..034226c5d4d0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -103,6 +103,32 @@ __i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915, size, page_size, flags); } +struct drm_i915_gem_object * +i915_gem_object_create_lmem_from_data(struct drm_i915_private *i915, + const void *data, size_t size) +{ + struct drm_i915_gem_object *obj; + void *map; + + obj = i915_gem_object_create_lmem(i915, + round_up(size, PAGE_SIZE), + I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) + return obj; + + map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); + if (IS_ERR(map)) { + i915_gem_object_put(obj); + return map; + } + + memcpy(map, data, size); + + i915_gem_object_unpin_map(obj); + + return obj; +} + struct drm_i915_gem_object * i915_gem_object_create_lmem(struct drm_i915_private *i915, resource_size_t size, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h index 4ee81fc66302..1b88ea13435c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -23,6 +23,10 @@ bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); +struct drm_i915_gem_object * +i915_gem_object_create_lmem_from_data(struct drm_i915_private *i915, + const void *data, size_t size); + struct drm_i915_gem_object * __i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915, resource_size_t size, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index fbfcae727d7f..8ffb689066f6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -3,6 +3,7 @@ * Copyright © 2014-2019 Intel Corporation */ +#include "gem/i915_gem_lmem.h" #include "gt/intel_gt.h" #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm_irq.h" @@ -647,7 +648,13 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) u64 flags; int ret; - obj = i915_gem_object_create_shmem(gt->i915, size); + if (HAS_LMEM(gt->i915)) + obj = i915_gem_object_create_lmem(gt->i915, size, + I915_BO_ALLOC_CPU_CLEAR | + I915_BO_ALLOC_CONTIGUOUS); + else + obj = i915_gem_object_create_shmem(gt->i915, size); + if (IS_ERR(obj)) return ERR_CAST(obj); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 76fe766ad1bc..196424be0998 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -41,18 +41,21 @@ static void guc_prepare_xfer(struct intel_uncore *uncore) } /* Copy RSA signature from the fw image to HW for verification */ -static void guc_xfer_rsa(struct intel_uc_fw *guc_fw, - struct intel_uncore *uncore) +static int guc_xfer_rsa(struct intel_uc_fw *guc_fw, + struct intel_uncore *uncore) { u32 rsa[UOS_RSA_SCRATCH_COUNT]; size_t copied; int i; copied = intel_uc_fw_copy_rsa(guc_fw, rsa, sizeof(rsa)); - GEM_BUG_ON(copied < sizeof(rsa)); + if (copied < sizeof(rsa)) + return -ENOMEM; for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++) intel_uncore_write(uncore, UOS_RSA_SCRATCH(i), rsa[i]); + + return 0; } /* @@ -141,7 +144,9 @@ int intel_guc_fw_upload(struct intel_guc *guc) * by the DMA engine in one operation, whereas the RSA signature is * loaded via MMIO. */ - guc_xfer_rsa(&guc->fw, uncore); + ret = guc_xfer_rsa(&guc->fw, uncore); + if (ret) + goto out; /* * Current uCode expects the code to be loaded at 8k; locations below diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index fc5387b410a2..ff4b6869b80b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -87,17 +87,25 @@ static int intel_huc_rsa_data_create(struct intel_huc *huc) vma->obj, true)); if (IS_ERR(vaddr)) { i915_vma_unpin_and_release(&vma, 0); - return PTR_ERR(vaddr); + err = PTR_ERR(vaddr); + goto unpin_out; } copied = intel_uc_fw_copy_rsa(&huc->fw, vaddr, vma->size); - GEM_BUG_ON(copied < huc->fw.rsa_size); - i915_gem_object_unpin_map(vma->obj); + if (copied < huc->fw.rsa_size) { + err = -ENOMEM; + goto unpin_out; + } + huc->rsa_data = vma; return 0; + +unpin_out: + i915_vma_unpin_and_release(&vma, 0); + return err; } static void intel_huc_rsa_data_destroy(struct intel_huc *huc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index f632dbd32b42..f8cb00ffb506 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -7,6 +7,7 @@ #include #include +#include "gem/i915_gem_lmem.h" #include "intel_uc_fw.h" #include "intel_uc_fw_abi.h" #include "i915_drv.h" @@ -370,7 +371,11 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) uc_fw->private_data_size = css->private_data_size; - obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size); + if (HAS_LMEM(i915)) + obj = i915_gem_object_create_lmem_from_data(i915, fw->data, fw->size); + else + obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size); + if (IS_ERR(obj)) { err = PTR_ERR(obj); goto fail; @@ -414,6 +419,7 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) struct drm_i915_gem_object *obj = uc_fw->obj; struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt; struct i915_vma *dummy = &uc_fw->dummy; + u32 pte_flags = 0; dummy->node.start = uc_fw_ggtt_offset(uc_fw); dummy->node.size = obj->base.size; @@ -424,9 +430,13 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) GEM_BUG_ON(dummy->node.size > ggtt->uc_fw.size); /* uc_fw->obj cache domains were not controlled across suspend */ - drm_clflush_sg(dummy->pages); + if (i915_gem_object_has_struct_page(obj)) + drm_clflush_sg(dummy->pages); + + if (i915_gem_object_is_lmem(obj)) + pte_flags |= PTE_LM; - ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, 0); + ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, pte_flags); } static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw) @@ -585,13 +595,68 @@ void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw) */ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len) { - struct sg_table *pages = uc_fw->obj->mm.pages; + struct intel_memory_region *mr = uc_fw->obj->mm.region; u32 size = min_t(u32, uc_fw->rsa_size, max_len); u32 offset = sizeof(struct uc_css_header) + uc_fw->ucode_size; + struct sgt_iter iter; + size_t count = 0; + int idx; + /* Called during reset handling, must be atomic [no fs_reclaim] */ GEM_BUG_ON(!intel_uc_fw_is_available(uc_fw)); - return sg_pcopy_to_buffer(pages->sgl, pages->nents, dst, size, offset); + idx = offset >> PAGE_SHIFT; + offset = offset_in_page(offset); + if (i915_gem_object_has_struct_page(uc_fw->obj)) { + struct page *page; + + for_each_sgt_page(page, iter, uc_fw->obj->mm.pages) { + u32 len = min_t(u32, size, PAGE_SIZE - offset); + void *vaddr; + + if (idx > 0) { + idx--; + continue; + } + + vaddr = kmap_atomic(page); + memcpy(dst, vaddr + offset, len); + kunmap_atomic(vaddr); + + offset = 0; + dst += len; + size -= len; + count += len; + if (!size) + break; + } + } else { + dma_addr_t addr; + + for_each_sgt_daddr(addr, iter, uc_fw->obj->mm.pages) { + u32 len = min_t(u32, size, PAGE_SIZE - offset); + void __iomem *vaddr; + + if (idx > 0) { + idx--; + continue; + } + + vaddr = io_mapping_map_atomic_wc(&mr->iomap, + addr - mr->region.start); + memcpy_fromio(dst, vaddr + offset, len); + io_mapping_unmap_atomic(vaddr); + + offset = 0; + dst += len; + size -= len; + count += len; + if (!size) + break; + } + } + + return count; } /** -- cgit v1.2.3-70-g09d2 From 87ba15d6b67abb72986b10b3c163faf0ad4c6a32 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 16 Sep 2021 09:28:17 -0700 Subject: drm/i915/guc: Add DG1 GuC / HuC firmware defs Add DG1 GuC / HuC firmware defs Signed-off-by: Matthew Brost Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210916162819.27848-4-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index f8cb00ffb506..a685d563df72 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -51,6 +51,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3), huc_def(tgl, 7, 9, 3)) \ fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ + fw_def(DG1, 0, guc_def(dg1, 62, 0, 0), huc_def(dg1, 7, 9, 3)) \ fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \ -- cgit v1.2.3-70-g09d2 From 9175ffff5ea9f2b9e956f7458d3fa38eec8f6ec8 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 16 Sep 2021 09:28:18 -0700 Subject: drm/i915/guc: Enable GuC submission by default on DG1 Enable GuC submission by default on DG1 Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210916162819.27848-5-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 86c318516e14..2fef3b0bbe95 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -35,7 +35,7 @@ static void uc_expand_default_options(struct intel_uc *uc) } /* Intermediate platforms are HuC authentication only */ - if (IS_DG1(i915) || IS_ALDERLAKE_S(i915)) { + if (IS_ALDERLAKE_S(i915)) { i915->params.enable_guc = ENABLE_GUC_LOAD_HUC; return; } -- cgit v1.2.3-70-g09d2 From 91160c8398243228dce619330fee600b4ad3a0f2 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 16 Sep 2021 09:28:19 -0700 Subject: drm/i915: Take pinning into account in __i915_gem_object_is_lmem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't blow up on a GEM_WARN_ON in __i915_gem_object_is_lmem if the object is pinned (not evictable). Signed-off-by: Matthew Brost Cc: Thomas Hellström Reviewed-by: Thomas Hellström Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210916162819.27848-6-matthew.brost@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 034226c5d4d0..d659239fcbcc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -56,8 +56,8 @@ bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) * @obj: The object to check. * * This function is intended to be called from within the fence signaling - * path where the fence keeps the object from being migrated. For example - * during gpu reset or similar. + * path where the fence, or a pin, keeps the object from being migrated. For + * example during gpu reset or similar. * * Return: Whether the object is resident in lmem. */ @@ -66,7 +66,8 @@ bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) struct intel_memory_region *mr = READ_ONCE(obj->mm.region); #ifdef CONFIG_LOCKDEP - GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, true)); + GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, true) && + !i915_gem_object_evictable(obj)); #endif return mr && (mr->type == INTEL_MEMORY_LOCAL || mr->type == INTEL_MEMORY_STOLEN_LOCAL); -- cgit v1.2.3-70-g09d2 From ff04f8beade56fead722d3f0ebcf63d4ab38e34d Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 17 Sep 2021 09:12:02 -0700 Subject: drm/i915/xehp: Check new fuse bits for SFC availability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xe_HP adds some new bits to the FUSE1 register to let us know whether a given SFC unit is present. We should take this into account while initializing SFC availability to our VCS and VECS engines. While we're at it, update the FUSE1 register definition to use REG_GENMASK / REG_FIELD_GET notation. Note that, the bspec confusingly names the fuse bits "disable" despite the register reflecting the *enable* status of the SFC units. The original architecture documents which the bspec is based on do properly name this field "SFC_ENABLE." Bspec: 52543 Cc: José Roberto de Souza Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210917161203.812251-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 25 ++++++++++++++++++++----- drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 +++ drivers/gpu/drm/i915/gt/intel_sseu.c | 5 ++--- drivers/gpu/drm/i915/i915_reg.h | 4 ++-- 4 files changed, 27 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 332efea696a5..06dfe7f38953 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -398,7 +398,8 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine) engine->uabi_capabilities |= I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC; } else if (engine->class == VIDEO_ENHANCEMENT_CLASS) { - if (GRAPHICS_VER(i915) >= 9) + if (GRAPHICS_VER(i915) >= 9 && + engine->gt->info.sfc_mask & BIT(engine->instance)) engine->uabi_capabilities |= I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC; } @@ -474,18 +475,25 @@ void intel_engines_free(struct intel_gt *gt) } static -bool gen11_vdbox_has_sfc(struct drm_i915_private *i915, +bool gen11_vdbox_has_sfc(struct intel_gt *gt, unsigned int physical_vdbox, unsigned int logical_vdbox, u16 vdbox_mask) { + struct drm_i915_private *i915 = gt->i915; + /* * In Gen11, only even numbered logical VDBOXes are hooked * up to an SFC (Scaler & Format Converter) unit. * In Gen12, Even numbered physical instance always are connected * to an SFC. Odd numbered physical instances have SFC only if * previous even instance is fused off. + * + * Starting with Xe_HP, there's also a dedicated SFC_ENABLE field + * in the fuse register that tells us whether a specific SFC is present. */ - if (GRAPHICS_VER(i915) == 12) + if ((gt->info.sfc_mask & BIT(physical_vdbox / 2)) == 0) + return false; + else if (GRAPHICS_VER(i915) == 12) return (physical_vdbox % 2 == 0) || !(BIT(physical_vdbox - 1) & vdbox_mask); else if (GRAPHICS_VER(i915) == 11) @@ -512,7 +520,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) struct intel_uncore *uncore = gt->uncore; unsigned int logical_vdbox = 0; unsigned int i; - u32 media_fuse; + u32 media_fuse, fuse1; u16 vdbox_mask; u16 vebox_mask; @@ -534,6 +542,13 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> GEN11_GT_VEBOX_DISABLE_SHIFT; + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + fuse1 = intel_uncore_read(uncore, HSW_PAVP_FUSE1); + gt->info.sfc_mask = REG_FIELD_GET(XEHP_SFC_ENABLE_MASK, fuse1); + } else { + gt->info.sfc_mask = ~0; + } + for (i = 0; i < I915_MAX_VCS; i++) { if (!HAS_ENGINE(gt, _VCS(i))) { vdbox_mask &= ~BIT(i); @@ -546,7 +561,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) continue; } - if (gen11_vdbox_has_sfc(i915, i, logical_vdbox, vdbox_mask)) + if (gen11_vdbox_has_sfc(gt, i, logical_vdbox, vdbox_mask)) gt->info.vdbox_sfc_access |= BIT(i); logical_vdbox++; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index ce127cae9e49..9f711ee0d42c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -186,6 +186,9 @@ struct intel_gt { u8 num_engines; + /* General presence of SFC units */ + u8 sfc_mask; + /* Media engine access to SFC per instance */ u8 vdbox_sfc_access; diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index b0e09b58005e..bdf09051b8a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -514,10 +514,9 @@ static void hsw_sseu_info_init(struct intel_gt *gt) } fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1); - switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) { + switch (REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)) { default: - MISSING_CASE((fuse1 & HSW_F1_EU_DIS_MASK) >> - HSW_F1_EU_DIS_SHIFT); + MISSING_CASE(REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)); fallthrough; case HSW_F1_EU_DIS_10EUS: sseu->eu_per_subslice = 10; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 76b437d5a37e..64d795461fc5 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3108,8 +3108,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) /* Fuse readout registers for GT */ #define HSW_PAVP_FUSE1 _MMIO(0x911C) -#define HSW_F1_EU_DIS_SHIFT 16 -#define HSW_F1_EU_DIS_MASK (0x3 << HSW_F1_EU_DIS_SHIFT) +#define XEHP_SFC_ENABLE_MASK REG_GENMASK(27, 24) +#define HSW_F1_EU_DIS_MASK REG_GENMASK(17, 16) #define HSW_F1_EU_DIS_10EUS 0 #define HSW_F1_EU_DIS_8EUS 1 #define HSW_F1_EU_DIS_6EUS 2 -- cgit v1.2.3-70-g09d2 From 45f63790e456455be6a69630042611083effe09b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 17 Sep 2021 09:12:03 -0700 Subject: drm/i915: Check SFC fusing before recording/dumping SFC_DONE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Xe_HP and beyond the SFC unit may be fused off, even if the corresponding media engines are present. Check the SFC-specific fusing before trying to dump the SFC_DONE instances. Cc: José Roberto de Souza Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210917161203.812251-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/i915_gpu_error.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index b9f66dbd46bb..2a2d7643b551 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -753,7 +753,8 @@ static void err_print_gt(struct drm_i915_error_state_buf *m, * only exists if the corresponding VCS engine is * present. */ - if (!HAS_ENGINE(gt->_gt, _VCS(i * 2))) + if ((gt->_gt->info.sfc_mask & BIT(i)) == 0 || + !HAS_ENGINE(gt->_gt, _VCS(i * 2))) continue; err_printf(m, " SFC_DONE[%d]: 0x%08x\n", i, @@ -1632,7 +1633,8 @@ static void gt_record_regs(struct intel_gt_coredump *gt) * only exists if the corresponding VCS engine is * present. */ - if (!HAS_ENGINE(gt->_gt, _VCS(i * 2))) + if ((gt->_gt->info.sfc_mask & BIT(i)) == 0 || + !HAS_ENGINE(gt->_gt, _VCS(i * 2))) continue; gt->sfc_done[i] = -- cgit v1.2.3-70-g09d2 From 1ab2b4cd128382ff64aaab55d6204362296139bc Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 10 Sep 2021 13:10:25 -0700 Subject: drm/i915/uncore: Convert gen6/gen7 read operations to fwtable On gen6-gen8 (except vlv/chv) we don't use a forcewake lookup table; we simply check whether the register offset is < 0x40000, and return FORCEWAKE_RENDER if it is. To prepare for upcoming refactoring, let's define a single-entry forcewake table from [0x0, 0x3ffff] and switch these platforms over to use the fwtable reader functions. v2: - Drop __gen6_reg_read_fw_domains which is no longer used. (Tvrtko) Cc: Tvrtko Ursulin Signed-off-by: Matt Roper Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20210910201030.3436066-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index f9767054dbdf..8c09af1e9f7a 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -853,16 +853,6 @@ void assert_forcewakes_active(struct intel_uncore *uncore, /* We give fast paths for the really cool registers */ #define NEEDS_FORCE_WAKE(reg) ((reg) < 0x40000) -#define __gen6_reg_read_fw_domains(uncore, offset) \ -({ \ - enum forcewake_domains __fwd; \ - if (NEEDS_FORCE_WAKE(offset)) \ - __fwd = FORCEWAKE_RENDER; \ - else \ - __fwd = 0; \ - __fwd; \ -}) - static int fw_range_cmp(u32 offset, const struct intel_forcewake_range *entry) { if (offset < entry->start) @@ -1064,6 +1054,10 @@ gen6_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) __fwd; \ }) +static const struct intel_forcewake_range __gen6_fw_ranges[] = { + GEN_FW_RANGE(0x0, 0x3ffff, FORCEWAKE_RENDER), +}; + /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ static const struct intel_forcewake_range __chv_fw_ranges[] = { GEN_FW_RANGE(0x2000, 0x3fff, FORCEWAKE_RENDER), @@ -1623,7 +1617,6 @@ __gen_read(func, 64) __gen_reg_read_funcs(gen11_fwtable); __gen_reg_read_funcs(fwtable); -__gen_reg_read_funcs(gen6); #undef __gen_reg_read_funcs #undef GEN6_READ_FOOTER @@ -2111,15 +2104,17 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (GRAPHICS_VER(i915) == 8) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __gen6_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen8); - ASSIGN_READ_MMIO_VFUNCS(uncore, gen6); + ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (IS_VALLEYVIEW(i915)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __vlv_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen6); ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (IS_GRAPHICS_VER(i915, 6, 7)) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __gen6_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen6); - ASSIGN_READ_MMIO_VFUNCS(uncore, gen6); + ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } uncore->pmic_bus_access_nb.notifier_call = i915_pmic_bus_access_notifier; -- cgit v1.2.3-70-g09d2 From 6cdbb1018238a363b9b842dc2a5733c9109319c6 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 10 Sep 2021 13:10:26 -0700 Subject: drm/i915/uncore: Associate shadow table with uncore Store a reference to a platform's shadow table inside the uncore, the same as we do with the forcewake table. This will allow us to use a single set of functions that operate on the shadow table reference rather than generating lots of nearly-identical functions via macros that differ only in terms of the table that they reference. Signed-off-by: Matt Roper Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20210910201030.3436066-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 40 ++++++++++++++++++++++++------------- drivers/gpu/drm/i915/intel_uncore.h | 7 +++++++ 2 files changed, 33 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 8c09af1e9f7a..a733bf6f6519 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1026,17 +1026,16 @@ static int mmio_range_cmp(u32 key, const struct i915_range *range) return 0; } -#define __is_X_shadowed(x) \ -static bool is_##x##_shadowed(u32 offset) \ -{ \ - const struct i915_range *regs = x##_shadowed_regs; \ - return BSEARCH(offset, regs, ARRAY_SIZE(x##_shadowed_regs), \ - mmio_range_cmp); \ -} +static bool is_shadowed(struct intel_uncore *uncore, u32 offset) +{ + if (drm_WARN_ON(&uncore->i915->drm, !uncore->shadowed_reg_table)) + return false; -__is_X_shadowed(gen8) -__is_X_shadowed(gen11) -__is_X_shadowed(gen12) + return BSEARCH(offset, + uncore->shadowed_reg_table, + uncore->shadowed_reg_table_entries, + mmio_range_cmp); +} static enum forcewake_domains gen6_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) @@ -1047,7 +1046,7 @@ gen6_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) #define __gen8_reg_write_fw_domains(uncore, offset) \ ({ \ enum forcewake_domains __fwd; \ - if (NEEDS_FORCE_WAKE(offset) && !is_gen8_shadowed(offset)) \ + if (NEEDS_FORCE_WAKE(offset) && !is_shadowed(uncore, offset)) \ __fwd = FORCEWAKE_RENDER; \ else \ __fwd = 0; \ @@ -1081,7 +1080,7 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = { #define __fwtable_reg_write_fw_domains(uncore, offset) \ ({ \ enum forcewake_domains __fwd = 0; \ - if (NEEDS_FORCE_WAKE((offset)) && !is_gen8_shadowed(offset)) \ + if (NEEDS_FORCE_WAKE((offset)) && !is_shadowed(uncore, offset)) \ __fwd = find_fw_domain(uncore, offset); \ __fwd; \ }) @@ -1090,7 +1089,7 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = { ({ \ enum forcewake_domains __fwd = 0; \ const u32 __offset = (offset); \ - if (!is_gen11_shadowed(__offset)) \ + if (!is_shadowed(uncore, __offset)) \ __fwd = find_fw_domain(uncore, __offset); \ __fwd; \ }) @@ -1099,7 +1098,7 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = { ({ \ enum forcewake_domains __fwd = 0; \ const u32 __offset = (offset); \ - if (!is_gen12_shadowed(__offset)) \ + if (!is_shadowed(uncore, __offset)) \ __fwd = find_fw_domain(uncore, __offset); \ __fwd; \ }) @@ -1969,6 +1968,12 @@ out: (uncore)->fw_domains_table_entries = ARRAY_SIZE((d)); \ } +#define ASSIGN_SHADOW_TABLE(uncore, d) \ +{ \ + (uncore)->shadowed_reg_table = d; \ + (uncore)->shadowed_reg_table_entries = ARRAY_SIZE((d)); \ +} + static int i915_pmic_bus_access_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -2081,30 +2086,37 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __dg2_fw_ranges); + ASSIGN_SHADOW_TABLE(uncore, gen12_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __xehp_fw_ranges); + ASSIGN_SHADOW_TABLE(uncore, gen12_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (GRAPHICS_VER(i915) >= 12) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen12_fw_ranges); + ASSIGN_SHADOW_TABLE(uncore, gen12_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (GRAPHICS_VER(i915) == 11) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen11_fw_ranges); + ASSIGN_SHADOW_TABLE(uncore, gen11_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen11_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (IS_GRAPHICS_VER(i915, 9, 10)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen9_fw_ranges); + ASSIGN_SHADOW_TABLE(uncore, gen8_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (IS_CHERRYVIEW(i915)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __chv_fw_ranges); + ASSIGN_SHADOW_TABLE(uncore, gen8_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (GRAPHICS_VER(i915) == 8) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen6_fw_ranges); + ASSIGN_SHADOW_TABLE(uncore, gen8_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen8); ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (IS_VALLEYVIEW(i915)) { diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 531665b08039..2f31c50eeae2 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -142,6 +142,13 @@ struct intel_uncore { const struct intel_forcewake_range *fw_domains_table; unsigned int fw_domains_table_entries; + /* + * Shadowed registers are special cases where we can safely write + * to the register *without* grabbing forcewake. + */ + const struct i915_range *shadowed_reg_table; + unsigned int shadowed_reg_table_entries; + struct notifier_block pmic_bus_access_nb; struct intel_uncore_funcs funcs; -- cgit v1.2.3-70-g09d2 From 09b2a597de37032c7571d1b066b5d336df8c14eb Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 10 Sep 2021 13:10:27 -0700 Subject: drm/i915/uncore: Replace gen8 write functions with general fwtable Now that we have both a standard forcewake table (albeit a single-entry table) and the shadow table stored in the uncore, we can drop the gen8-specific write handlers in favor of the general fwtable version. Signed-off-by: Matt Roper Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20210910201030.3436066-4-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index a733bf6f6519..f3c77f617458 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1043,16 +1043,6 @@ gen6_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) return FORCEWAKE_RENDER; } -#define __gen8_reg_write_fw_domains(uncore, offset) \ -({ \ - enum forcewake_domains __fwd; \ - if (NEEDS_FORCE_WAKE(offset) && !is_shadowed(uncore, offset)) \ - __fwd = FORCEWAKE_RENDER; \ - else \ - __fwd = 0; \ - __fwd; \ -}) - static const struct intel_forcewake_range __gen6_fw_ranges[] = { GEN_FW_RANGE(0x0, 0x3ffff, FORCEWAKE_RENDER), }; @@ -1707,7 +1697,6 @@ __gen_write(func, 32) __gen_reg_write_funcs(gen12_fwtable); __gen_reg_write_funcs(gen11_fwtable); __gen_reg_write_funcs(fwtable); -__gen_reg_write_funcs(gen8); #undef __gen_reg_write_funcs #undef GEN6_WRITE_FOOTER @@ -2117,7 +2106,7 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) } else if (GRAPHICS_VER(i915) == 8) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen6_fw_ranges); ASSIGN_SHADOW_TABLE(uncore, gen8_shadowed_regs); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen8); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (IS_VALLEYVIEW(i915)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __vlv_fw_ranges); -- cgit v1.2.3-70-g09d2 From aef02736a8516a578098a9f066c67e891a075cb4 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 10 Sep 2021 13:10:28 -0700 Subject: drm/i915/uncore: Drop gen11/gen12 mmio write handlers Now that the reference to the shadow table is stored within the uncore, we don't need to generate separate fwtable, gen11_fwtable, and gen12_fwtable variants of the register write functions; a single 'fwtable' implementation will work for all of those platforms now. While consolidating the functions, gen11/gen12 pick up a NEEDS_FORCE_WAKE() check that they didn't have before, allowing them to bypass a lot of forcewake/shadow checking for non-GT registers (e.g., display). However since these later platforms also introduce media engines at higher MMIO offsets, the definition of NEEDS_FORCE_WAKE() is extended to also consider register offsets above GEN11_BSD_RING_BASE. v2: - Restore NEEDS_FORCE_WAKE(), but extend it for compatibility with the gen11+ platforms by also passing offsets above GEN11_BSD_RING_BASE. (Chris, Tvrtko) Cc: Tvrtko Ursulin Cc: Chris Wilson Signed-off-by: Matt Roper Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20210910201030.3436066-5-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 60 +++++++++++++------------------------ 1 file changed, 21 insertions(+), 39 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index f3c77f617458..00da0b718293 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -851,7 +851,10 @@ void assert_forcewakes_active(struct intel_uncore *uncore, } /* We give fast paths for the really cool registers */ -#define NEEDS_FORCE_WAKE(reg) ((reg) < 0x40000) +#define NEEDS_FORCE_WAKE(reg) ({ \ + u32 __reg = (reg); \ + __reg < 0x40000 || __reg >= GEN11_BSD_RING_BASE; \ +}) static int fw_range_cmp(u32 offset, const struct intel_forcewake_range *entry) { @@ -1068,27 +1071,10 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = { }; #define __fwtable_reg_write_fw_domains(uncore, offset) \ -({ \ - enum forcewake_domains __fwd = 0; \ - if (NEEDS_FORCE_WAKE((offset)) && !is_shadowed(uncore, offset)) \ - __fwd = find_fw_domain(uncore, offset); \ - __fwd; \ -}) - -#define __gen11_fwtable_reg_write_fw_domains(uncore, offset) \ -({ \ - enum forcewake_domains __fwd = 0; \ - const u32 __offset = (offset); \ - if (!is_shadowed(uncore, __offset)) \ - __fwd = find_fw_domain(uncore, __offset); \ - __fwd; \ -}) - -#define __gen12_fwtable_reg_write_fw_domains(uncore, offset) \ ({ \ enum forcewake_domains __fwd = 0; \ const u32 __offset = (offset); \ - if (!is_shadowed(uncore, __offset)) \ + if (NEEDS_FORCE_WAKE((__offset)) && !is_shadowed(uncore, __offset)) \ __fwd = find_fw_domain(uncore, __offset); \ __fwd; \ }) @@ -1672,33 +1658,29 @@ __gen6_write(8) __gen6_write(16) __gen6_write(32) -#define __gen_write(func, x) \ +#define __gen_fwtable_write(x) \ static void \ -func##_write##x(struct intel_uncore *uncore, i915_reg_t reg, u##x val, bool trace) { \ +fwtable_write##x(struct intel_uncore *uncore, i915_reg_t reg, u##x val, bool trace) { \ enum forcewake_domains fw_engine; \ GEN6_WRITE_HEADER; \ - fw_engine = __##func##_reg_write_fw_domains(uncore, offset); \ + fw_engine = __fwtable_reg_write_fw_domains(uncore, offset); \ if (fw_engine) \ __force_wake_auto(uncore, fw_engine); \ __raw_uncore_write##x(uncore, reg, val); \ GEN6_WRITE_FOOTER; \ } -#define __gen_reg_write_funcs(func) \ -static enum forcewake_domains \ -func##_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) { \ - return __##func##_reg_write_fw_domains(uncore, i915_mmio_reg_offset(reg)); \ -} \ -\ -__gen_write(func, 8) \ -__gen_write(func, 16) \ -__gen_write(func, 32) +static enum forcewake_domains +fwtable_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) +{ + return __fwtable_reg_write_fw_domains(uncore, i915_mmio_reg_offset(reg)); +} -__gen_reg_write_funcs(gen12_fwtable); -__gen_reg_write_funcs(gen11_fwtable); -__gen_reg_write_funcs(fwtable); +__gen_fwtable_write(8) +__gen_fwtable_write(16) +__gen_fwtable_write(32) -#undef __gen_reg_write_funcs +#undef __gen_fwtable_write #undef GEN6_WRITE_FOOTER #undef GEN6_WRITE_HEADER @@ -2076,22 +2058,22 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __dg2_fw_ranges); ASSIGN_SHADOW_TABLE(uncore, gen12_shadowed_regs); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __xehp_fw_ranges); ASSIGN_SHADOW_TABLE(uncore, gen12_shadowed_regs); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (GRAPHICS_VER(i915) >= 12) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen12_fw_ranges); ASSIGN_SHADOW_TABLE(uncore, gen12_shadowed_regs); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (GRAPHICS_VER(i915) == 11) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen11_fw_ranges); ASSIGN_SHADOW_TABLE(uncore, gen11_shadowed_regs); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen11_fwtable); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (IS_GRAPHICS_VER(i915, 9, 10)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen9_fw_ranges); -- cgit v1.2.3-70-g09d2 From e5b32ae34b02c74d3327789281b88c1e59fdca30 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 10 Sep 2021 13:10:29 -0700 Subject: drm/i915/uncore: Drop gen11 mmio read handlers Consolidate down to just a single 'fwtable' implementation. For reads we don't need to worry about shadow tables. While consolidating the functions, gen11/gen12 pick up a NEEDS_FORCE_WAKE() check that they didn't have before, allowing them to bypass a lot of forcewake/shadow checking for non-GT registers (e.g., display). v2: - Restore NEEDS_FORCE_WAKE() check. (Chris, Tvrtko) Cc: Chris Wilson Cc: Tvrtko Ursulin Signed-off-by: Matt Roper Reviewed-by: Tvrtko Ursulin Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20210910201030.3436066-6-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 40 ++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 23 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 00da0b718293..2547a9e02efc 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -935,9 +935,6 @@ static const struct intel_forcewake_range __vlv_fw_ranges[] = { __fwd; \ }) -#define __gen11_fwtable_reg_read_fw_domains(uncore, offset) \ - find_fw_domain(uncore, offset) - /* *Must* be sorted by offset! See intel_shadow_table_check(). */ static const struct i915_range gen8_shadowed_regs[] = { { .start = 0x2030, .end = 0x2030 }, @@ -1567,33 +1564,30 @@ static inline void __force_wake_auto(struct intel_uncore *uncore, ___force_wake_auto(uncore, fw_domains); } -#define __gen_read(func, x) \ +#define __gen_fwtable_read(x) \ static u##x \ -func##_read##x(struct intel_uncore *uncore, i915_reg_t reg, bool trace) { \ +fwtable_read##x(struct intel_uncore *uncore, i915_reg_t reg, bool trace) \ +{ \ enum forcewake_domains fw_engine; \ GEN6_READ_HEADER(x); \ - fw_engine = __##func##_reg_read_fw_domains(uncore, offset); \ + fw_engine = __fwtable_reg_read_fw_domains(uncore, offset); \ if (fw_engine) \ __force_wake_auto(uncore, fw_engine); \ val = __raw_uncore_read##x(uncore, reg); \ GEN6_READ_FOOTER; \ } -#define __gen_reg_read_funcs(func) \ -static enum forcewake_domains \ -func##_reg_read_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) { \ - return __##func##_reg_read_fw_domains(uncore, i915_mmio_reg_offset(reg)); \ -} \ -\ -__gen_read(func, 8) \ -__gen_read(func, 16) \ -__gen_read(func, 32) \ -__gen_read(func, 64) +static enum forcewake_domains +fwtable_reg_read_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) { + return __fwtable_reg_read_fw_domains(uncore, i915_mmio_reg_offset(reg)); +} -__gen_reg_read_funcs(gen11_fwtable); -__gen_reg_read_funcs(fwtable); +__gen_fwtable_read(8) +__gen_fwtable_read(16) +__gen_fwtable_read(32) +__gen_fwtable_read(64) -#undef __gen_reg_read_funcs +#undef __gen_fwtable_read #undef GEN6_READ_FOOTER #undef GEN6_READ_HEADER @@ -2059,22 +2053,22 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) ASSIGN_FW_DOMAINS_TABLE(uncore, __dg2_fw_ranges); ASSIGN_SHADOW_TABLE(uncore, gen12_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __xehp_fw_ranges); ASSIGN_SHADOW_TABLE(uncore, gen12_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (GRAPHICS_VER(i915) >= 12) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen12_fw_ranges); ASSIGN_SHADOW_TABLE(uncore, gen12_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (GRAPHICS_VER(i915) == 11) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen11_fw_ranges); ASSIGN_SHADOW_TABLE(uncore, gen11_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (IS_GRAPHICS_VER(i915, 9, 10)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen9_fw_ranges); ASSIGN_SHADOW_TABLE(uncore, gen8_shadowed_regs); -- cgit v1.2.3-70-g09d2 From c74e66d47e883d7fa345a74154d355a297b1abbd Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 10 Sep 2021 13:10:30 -0700 Subject: drm/i915/dg2: Add DG2-specific shadow register table We thought the DG2 table of shadowed registers would be the same as the gen12/xehp table, but it turns out that there are a few minor differences that require us to define a new DG2-specific table: * One register is removed (0xC4D4) * One register is added (0xC4E0) Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20210910201030.3436066-7-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 41 ++++++++++++++++++++++++++- drivers/gpu/drm/i915/selftests/intel_uncore.c | 1 + 2 files changed, 41 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 2547a9e02efc..c8e7c71f0896 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1016,6 +1016,45 @@ static const struct i915_range gen12_shadowed_regs[] = { { .start = 0x1F8510, .end = 0x1F8550 }, }; +static const struct i915_range dg2_shadowed_regs[] = { + { .start = 0x2030, .end = 0x2030 }, + { .start = 0x2510, .end = 0x2550 }, + { .start = 0xA008, .end = 0xA00C }, + { .start = 0xA188, .end = 0xA188 }, + { .start = 0xA278, .end = 0xA278 }, + { .start = 0xA540, .end = 0xA56C }, + { .start = 0xC4C8, .end = 0xC4C8 }, + { .start = 0xC4E0, .end = 0xC4E0 }, + { .start = 0xC600, .end = 0xC600 }, + { .start = 0xC658, .end = 0xC658 }, + { .start = 0x22030, .end = 0x22030 }, + { .start = 0x22510, .end = 0x22550 }, + { .start = 0x1C0030, .end = 0x1C0030 }, + { .start = 0x1C0510, .end = 0x1C0550 }, + { .start = 0x1C4030, .end = 0x1C4030 }, + { .start = 0x1C4510, .end = 0x1C4550 }, + { .start = 0x1C8030, .end = 0x1C8030 }, + { .start = 0x1C8510, .end = 0x1C8550 }, + { .start = 0x1D0030, .end = 0x1D0030 }, + { .start = 0x1D0510, .end = 0x1D0550 }, + { .start = 0x1D4030, .end = 0x1D4030 }, + { .start = 0x1D4510, .end = 0x1D4550 }, + { .start = 0x1D8030, .end = 0x1D8030 }, + { .start = 0x1D8510, .end = 0x1D8550 }, + { .start = 0x1E0030, .end = 0x1E0030 }, + { .start = 0x1E0510, .end = 0x1E0550 }, + { .start = 0x1E4030, .end = 0x1E4030 }, + { .start = 0x1E4510, .end = 0x1E4550 }, + { .start = 0x1E8030, .end = 0x1E8030 }, + { .start = 0x1E8510, .end = 0x1E8550 }, + { .start = 0x1F0030, .end = 0x1F0030 }, + { .start = 0x1F0510, .end = 0x1F0550 }, + { .start = 0x1F4030, .end = 0x1F4030 }, + { .start = 0x1F4510, .end = 0x1F4550 }, + { .start = 0x1F8030, .end = 0x1F8030 }, + { .start = 0x1F8510, .end = 0x1F8550 }, +}; + static int mmio_range_cmp(u32 key, const struct i915_range *range) { if (key < range->start) @@ -2051,7 +2090,7 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __dg2_fw_ranges); - ASSIGN_SHADOW_TABLE(uncore, gen12_shadowed_regs); + ASSIGN_SHADOW_TABLE(uncore, dg2_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 22ef2c87df1a..bc8128170a99 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -68,6 +68,7 @@ static int intel_shadow_table_check(void) { gen8_shadowed_regs, ARRAY_SIZE(gen8_shadowed_regs) }, { gen11_shadowed_regs, ARRAY_SIZE(gen11_shadowed_regs) }, { gen12_shadowed_regs, ARRAY_SIZE(gen12_shadowed_regs) }, + { dg2_shadowed_regs, ARRAY_SIZE(dg2_shadowed_regs) }, }; const struct i915_range *range; unsigned int i, j; -- cgit v1.2.3-70-g09d2 From 68c03c0e985edaf8f5ed44d56c931f9290d9311a Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 15 Sep 2021 12:41:53 +0100 Subject: drm/i915/debugfs: Do not report currently active engine when describing objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is not very useful to have code which tries to report a rapidly transient state which will not report anything majority of the time, especially since it is currently only used from /i915_gem_framebuffers. Signed-off-by: Tvrtko Ursulin Acked-by: Christian König Cc: Daniel Vetter Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210915114153.951670-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object.h | 17 ----------------- drivers/gpu/drm/i915/i915_debugfs.c | 5 ----- 2 files changed, 22 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 48112b9d76df..3043fcbd31bd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -503,23 +503,6 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj) i915_gem_object_unpin_pages(obj); } -static inline struct intel_engine_cs * -i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) -{ - struct intel_engine_cs *engine = NULL; - struct dma_fence *fence; - - rcu_read_lock(); - fence = dma_resv_get_excl_unlocked(obj->base.resv); - rcu_read_unlock(); - - if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) - engine = to_request(fence)->engine; - dma_fence_put(fence); - - return engine; -} - void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, unsigned int cache_level); void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9f2cfcf7aa6a..fdbd46ff59e0 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -140,7 +140,6 @@ void i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct intel_engine_cs *engine; struct i915_vma *vma; int pin_count = 0; @@ -230,10 +229,6 @@ i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, " (stolen: %08llx)", obj->stolen->start); if (i915_gem_object_is_framebuffer(obj)) seq_printf(m, " (fb)"); - - engine = i915_gem_object_last_write_engine(obj); - if (engine) - seq_printf(m, " (%s)", engine->name); } static int i915_gem_object_info(struct seq_file *m, void *data) -- cgit v1.2.3-70-g09d2 From 54fc4f134e096bc508a095348cb4b4965aa1c07b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 22 Sep 2021 17:30:29 -0700 Subject: drm/i915/uncore: fwtable read handlers are now used on all forcewake platforms With the recent refactor of the uncore mmio handling, all forcewake-based platforms (i.e., graphics version 6 and beyond) now use the 'fwtable' read handlers. Let's pull the assignment out of the per-platform if/else ladder to make this more obvious. Suggested-by: Tvrtko Ursulin Suggested-by: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20210923003029.2194375-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index c8e7c71f0896..678a99de07fe 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -2088,49 +2088,42 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) return ret; forcewake_early_sanitize(uncore, 0); + ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __dg2_fw_ranges); ASSIGN_SHADOW_TABLE(uncore, dg2_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __xehp_fw_ranges); ASSIGN_SHADOW_TABLE(uncore, gen12_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (GRAPHICS_VER(i915) >= 12) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen12_fw_ranges); ASSIGN_SHADOW_TABLE(uncore, gen12_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (GRAPHICS_VER(i915) == 11) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen11_fw_ranges); ASSIGN_SHADOW_TABLE(uncore, gen11_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (IS_GRAPHICS_VER(i915, 9, 10)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen9_fw_ranges); ASSIGN_SHADOW_TABLE(uncore, gen8_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (IS_CHERRYVIEW(i915)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __chv_fw_ranges); ASSIGN_SHADOW_TABLE(uncore, gen8_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (GRAPHICS_VER(i915) == 8) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen6_fw_ranges); ASSIGN_SHADOW_TABLE(uncore, gen8_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (IS_VALLEYVIEW(i915)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __vlv_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen6); - ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } else if (IS_GRAPHICS_VER(i915, 6, 7)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen6_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen6); - ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); } uncore->pmic_bus_access_nb.notifier_call = i915_pmic_bus_access_notifier; -- cgit v1.2.3-70-g09d2 From 017792a041183c4f5ec595f386e76a40ed728cfc Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Mon, 20 Sep 2021 16:05:00 +0900 Subject: drm/i915/guc, docs: Fix pdfdocs build error by removing nested grid Nested grids in grid-table cells are not specified as proper ReST constructs. Commit 572f2a5cd974 ("drm/i915/guc: Update firmware to v62.0.0") added a couple of kerneldoc tables of the form: +---+-------+------------------------------------------------------+ | 1 | 31:0 | +------------------------------------------------+ | +---+-------+ | | | |...| | | Embedded `HXG Message`_ | | +---+-------+ | | | | n | 31:0 | +------------------------------------------------+ | +---+-------+------------------------------------------------------+ For "make htmldocs", they happen to work as one might expect, but they are incompatible with "make latexdocs" and "make pdfdocs", and cause the generated gpu.tex file to become incomplete and unbuildable by xelatex. Restore the compatibility by removing those nested grids in the tables. Size comparison of generated gpu.tex: Sphinx 2.4.4 Sphinx 4.2.0 v5.14: 3238686 3841631 v5.15-rc1: 376270 432729 with this fix: 3377846 3998095 Fixes: 572f2a5cd974 ("drm/i915/guc: Update firmware to v62.0.0") Cc: John Harrison Cc: Michal Wajdeczko Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Cc: Matt Roper Cc: Jonathan Corbet Signed-off-by: Akira Yokosawa Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/4a227569-074f-c501-58bb-d0d8f60a8ae9@gmail.com --- drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h | 10 +++++----- drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h index 99e1fad5ca20..c9086a600bce 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h @@ -102,11 +102,11 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64); * | +-------+--------------------------------------------------------------+ * | | 7:0 | NUM_DWORDS = length (in dwords) of the embedded HXG message | * +---+-------+--------------------------------------------------------------+ - * | 1 | 31:0 | +--------------------------------------------------------+ | - * +---+-------+ | | | - * |...| | | Embedded `HXG Message`_ | | - * +---+-------+ | | | - * | n | 31:0 | +--------------------------------------------------------+ | + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | [Embedded `HXG Message`_] | + * +---+-------+ | + * | n | 31:0 | | * +---+-------+--------------------------------------------------------------+ */ diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h index bbf1ddb77434..9baa3cb07d13 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h @@ -38,11 +38,11 @@ * +---+-------+--------------------------------------------------------------+ * | | Bits | Description | * +===+=======+==============================================================+ - * | 0 | 31:0 | +--------------------------------------------------------+ | - * +---+-------+ | | | - * |...| | | Embedded `HXG Message`_ | | - * +---+-------+ | | | - * | n | 31:0 | +--------------------------------------------------------+ | + * | 0 | 31:0 | | + * +---+-------+ | + * |...| | [Embedded `HXG Message`_] | + * +---+-------+ | + * | n | 31:0 | | * +---+-------+--------------------------------------------------------------+ */ -- cgit v1.2.3-70-g09d2 From 2dfa597d249cbe16962ac142c882b30f7ff385e5 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 22 Sep 2021 10:38:05 +0200 Subject: drm/i915/gem: Fix a lockdep warning the __i915_gem_is_lmem() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Somehow we managed to invert the test for i915_gem_object_evictable(), which causes a warning in DG1 BAT, igt@debugfs_test@read_all_entries. Fix the lock check to only warn if the object *is* indeed evictable and not protected from eviction by fences. Cc: Matthew Brost Fixes: 91160c839824 ("drm/i915: Take pinning into account in __i915_gem_object_is_lmem") Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210922083807.888206-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index d659239fcbcc..444f8268b9c5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -67,7 +67,7 @@ bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) #ifdef CONFIG_LOCKDEP GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, true) && - !i915_gem_object_evictable(obj)); + i915_gem_object_evictable(obj)); #endif return mr && (mr->type == INTEL_MEMORY_LOCAL || mr->type == INTEL_MEMORY_STOLEN_LOCAL); -- cgit v1.2.3-70-g09d2 From 0d9388635a22331d39989b96d5830b486d9c8900 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 22 Sep 2021 08:25:19 +0200 Subject: drm/i915/ttm: Implement a function to copy the contents of two TTM-based objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When backing up or restoring contents of pinned objects at suspend / resume time we need to allocate a new object as the backup. Add a function to facilitate copies between the two. Some data needs to be copied before the migration context is ready for operation, so make sure we can disable accelerated copies. v2: - Fix a missing return value check (Matthew Auld) Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210922062527.865433-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 69 ++++++++++++++++++++++++++++----- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 4 ++ 2 files changed, 64 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 1e9fb126cb7b..ce2b7386e494 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -429,6 +429,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, static int i915_ttm_accel_move(struct ttm_buffer_object *bo, bool clear, struct ttm_resource *dst_mem, + struct ttm_tt *dst_ttm, struct sg_table *dst_st) { struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915), @@ -438,14 +439,14 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); struct sg_table *src_st; struct i915_request *rq; - struct ttm_tt *ttm = bo->ttm; + struct ttm_tt *src_ttm = bo->ttm; enum i915_cache_level src_level, dst_level; int ret; if (!i915->gt.migrate.context) return -EINVAL; - dst_level = i915_ttm_cache_level(i915, dst_mem, ttm); + dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm); if (clear) { if (bo->type == ttm_bo_type_kernel) return -EINVAL; @@ -462,10 +463,10 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, } intel_engine_pm_put(i915->gt.migrate.context->engine); } else { - src_st = src_man->use_tt ? i915_ttm_tt_get_st(ttm) : + src_st = src_man->use_tt ? i915_ttm_tt_get_st(src_ttm) : obj->ttm.cached_io_st; - src_level = i915_ttm_cache_level(i915, bo->resource, ttm); + src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm); intel_engine_pm_get(i915->gt.migrate.context->engine); ret = intel_context_migrate_copy(i915->gt.migrate.context, NULL, src_st->sgl, src_level, @@ -485,11 +486,14 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, struct ttm_resource *dst_mem, - struct sg_table *dst_st) + struct ttm_tt *dst_ttm, + struct sg_table *dst_st, + bool allow_accel) { - int ret; + int ret = -EINVAL; - ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st); + if (allow_accel) + ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm, dst_st); if (ret) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); struct intel_memory_region *dst_reg, *src_reg; @@ -504,7 +508,7 @@ static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, GEM_BUG_ON(!dst_reg || !src_reg); dst_iter = !cpu_maps_iomem(dst_mem) ? - ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) : + ttm_kmap_iter_tt_init(&_dst_iter.tt, dst_ttm) : ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, dst_st, dst_reg->region.start); @@ -559,7 +563,7 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); if (!(clear && ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC))) - __i915_ttm_move(bo, clear, dst_mem, dst_st); + __i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_st, true); ttm_bo_move_sync_cleanup(bo, dst_mem); i915_ttm_adjust_domains_after_move(obj); @@ -974,3 +978,50 @@ i915_gem_ttm_system_setup(struct drm_i915_private *i915, intel_memory_region_set_name(mr, "system-ttm"); return mr; } + +/** + * i915_gem_obj_copy_ttm - Copy the contents of one ttm-based gem object to + * another + * @dst: The destination object + * @src: The source object + * @allow_accel: Allow using the blitter. Otherwise TTM memcpy is used. + * @intr: Whether to perform waits interruptible: + * + * Note: The caller is responsible for assuring that the underlying + * TTM objects are populated if needed and locked. + * + * Return: Zero on success. Negative error code on error. If @intr == true, + * then it may return -ERESTARTSYS or -EINTR. + */ +int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, + struct drm_i915_gem_object *src, + bool allow_accel, bool intr) +{ + struct ttm_buffer_object *dst_bo = i915_gem_to_ttm(dst); + struct ttm_buffer_object *src_bo = i915_gem_to_ttm(src); + struct ttm_operation_ctx ctx = { + .interruptible = intr, + }; + struct sg_table *dst_st; + int ret; + + assert_object_held(dst); + assert_object_held(src); + + /* + * Sync for now. This will change with async moves. + */ + ret = ttm_bo_wait_ctx(dst_bo, &ctx); + if (!ret) + ret = ttm_bo_wait_ctx(src_bo, &ctx); + if (ret) + return ret; + + dst_st = gpu_binds_iomem(dst_bo->resource) ? + dst->ttm.cached_io_st : i915_ttm_tt_get_st(dst_bo->ttm); + + __i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo->ttm, + dst_st, allow_accel); + + return 0; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index 40927f67b6d9..34ac78d47b0d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -46,4 +46,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, resource_size_t size, resource_size_t page_size, unsigned int flags); + +int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, + struct drm_i915_gem_object *src, + bool allow_accel, bool intr); #endif -- cgit v1.2.3-70-g09d2 From d80ee88e0769e2e05afeb5d04b4dc43fc107b0d5 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 22 Sep 2021 08:25:20 +0200 Subject: drm/i915/gem: Implement a function to process all gem objects of a region MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An upcoming common pattern is to traverse the region object list and perform certain actions on all objects in a region. It's a little tricky to get the list locking right, in particular since a gem object may change region unless it's pinned or the object lock is held. Define a function that does this for us and that takes an argument that defines the action to be performed on each object. v3: - Improve structure documentation a bit (Matthew Auld) Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210922062527.865433-3-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_region.c | 70 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_region.h | 37 ++++++++++++++++ 2 files changed, 107 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index 1f557b2178ed..a016ccec36f3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -80,3 +80,73 @@ err_object_free: i915_gem_object_free(obj); return ERR_PTR(err); } + +/** + * i915_gem_process_region - Iterate over all objects of a region using ops + * to process and optionally skip objects + * @mr: The memory region + * @apply: ops and private data + * + * This function can be used to iterate over the regions object list, + * checking whether to skip objects, and, if not, lock the objects and + * process them using the supplied ops. Note that this function temporarily + * removes objects from the region list while iterating, so that if run + * concurrently with itself may not iterate over all objects. + * + * Return: 0 if successful, negative error code on failure. + */ +int i915_gem_process_region(struct intel_memory_region *mr, + struct i915_gem_apply_to_region *apply) +{ + const struct i915_gem_apply_to_region_ops *ops = apply->ops; + struct drm_i915_gem_object *obj; + struct list_head still_in_list; + int ret = 0; + + /* + * In the future, a non-NULL apply->ww could mean the caller is + * already in a locking transaction and provides its own context. + */ + GEM_WARN_ON(apply->ww); + + INIT_LIST_HEAD(&still_in_list); + mutex_lock(&mr->objects.lock); + for (;;) { + struct i915_gem_ww_ctx ww; + + obj = list_first_entry_or_null(&mr->objects.list, typeof(*obj), + mm.region_link); + if (!obj) + break; + + list_move_tail(&obj->mm.region_link, &still_in_list); + if (!kref_get_unless_zero(&obj->base.refcount)) + continue; + + /* + * Note: Someone else might be migrating the object at this + * point. The object's region is not stable until we lock + * the object. + */ + mutex_unlock(&mr->objects.lock); + apply->ww = &ww; + for_i915_gem_ww(&ww, ret, apply->interruptible) { + ret = i915_gem_object_lock(obj, apply->ww); + if (ret) + continue; + + if (obj->mm.region == mr) + ret = ops->process_obj(apply, obj); + /* Implicit object unlock */ + } + + i915_gem_object_put(obj); + mutex_lock(&mr->objects.lock); + if (ret) + break; + } + list_splice_tail(&still_in_list, &mr->objects.list); + mutex_unlock(&mr->objects.lock); + + return ret; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h index 1008e580a89a..fcaa12d657d4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h @@ -12,6 +12,41 @@ struct intel_memory_region; struct drm_i915_gem_object; struct sg_table; +struct i915_gem_apply_to_region; + +/** + * struct i915_gem_apply_to_region_ops - ops to use when iterating over all + * region objects. + */ +struct i915_gem_apply_to_region_ops { + /** + * process_obj - Process the current object + * @apply: Embed this for private data. + * @obj: The current object. + * + * Note that if this function is part of a ww transaction, and + * if returns -EDEADLK for one of the objects, it may be + * rerun for that same object in the same pass. + */ + int (*process_obj)(struct i915_gem_apply_to_region *apply, + struct drm_i915_gem_object *obj); +}; + +/** + * struct i915_gem_apply_to_region - Argument to the struct + * i915_gem_apply_to_region_ops functions. + * @ops: The ops for the operation. + * @ww: Locking context used for the transaction. + * @interruptible: Whether to perform object locking interruptible. + * + * This structure is intended to be embedded in a private struct if needed + */ +struct i915_gem_apply_to_region { + const struct i915_gem_apply_to_region_ops *ops; + struct i915_gem_ww_ctx *ww; + u32 interruptible:1; +}; + void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, struct intel_memory_region *mem); void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj); @@ -22,4 +57,6 @@ i915_gem_object_create_region(struct intel_memory_region *mem, resource_size_t page_size, unsigned int flags); +int i915_gem_process_region(struct intel_memory_region *mr, + struct i915_gem_apply_to_region *apply); #endif -- cgit v1.2.3-70-g09d2 From 81387fc4f6e080806da7a481eca0052fc76cfbd2 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 22 Sep 2021 08:25:21 +0200 Subject: drm/i915/gt: Increase suspend timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With GuC submission on DG1, the execution of the requests times out for the gem_exec_suspend igt test case after executing around 800-900 of 1000 submitted requests. Given the time we allow elsewhere for fences to signal (in the order of seconds), increase the timeout before we mark the gt wedged and proceed. Signed-off-by: Thomas Hellström Acked-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210922062527.865433-4-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index dea8e2479897..f84f2bfe2de0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -19,6 +19,8 @@ #include "intel_rps.h" #include "intel_wakeref.h" +#define I915_GT_SUSPEND_IDLE_TIMEOUT (HZ / 2) + static void user_forcewake(struct intel_gt *gt, bool suspend) { int count = atomic_read(>->user_wakeref); @@ -279,7 +281,7 @@ static void wait_for_suspend(struct intel_gt *gt) if (!intel_gt_pm_is_awake(gt)) return; - if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { + if (intel_gt_wait_for_idle(gt, I915_GT_SUSPEND_IDLE_TIMEOUT) == -ETIME) { /* * Forcibly cancel outstanding work and leave * the gpu quiet. -- cgit v1.2.3-70-g09d2 From c56ce9565374e565a4d9eb79aff60a7c19fd4b28 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 22 Sep 2021 08:25:22 +0200 Subject: drm/i915 Implement LMEM backup and restore for suspend / resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just evict unpinned objects to system. For pinned LMEM objects, make a backup system object and blit the contents to that. Backup is performed in three steps, 1: Opportunistically evict evictable objects using the gpu blitter. 2: After gt idle, evict evictable objects using the gpu blitter. This will be modified in an upcoming patch to backup pinned objects that are not used by the blitter itself. 3: Backup remaining pinned objects using memcpy. Also move uC suspend to after 2) to make sure we have a functional GuC during 2) if using GuC submission. v2: - Major refactor to make sure gem_exec_suspend@hang-SX subtests work, and suspend / resume works with a slightly modified GuC submission enabling patch series. v3: - Fix a potential use-after-free (Matthew Auld) - Use i915_gem_object_create_shmem() instead of i915_gem_object_create_region (Matthew Auld) - Minor simplifications (Matthew Auld) - Fix up kerneldoc for i195_ttm_restore_region(). - Final lmem_suspend() call moved to i915_gem_backup_suspend from i915_gem_suspend_late, since the latter gets called at driver unload and we don't unnecessarily want to run it at that time. v4: - Interface change of ttm- & lmem suspend / resume functions to use flags rather than bools. (Matthew Auld) - Completely drop the i915_gem_backup_suspend change (Matthew Auld) Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210922062527.865433-5-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_pm.c | 87 ++++++++++ drivers/gpu/drm/i915/gem/i915_gem_pm.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 30 +++- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 10 ++ drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c | 202 +++++++++++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.h | 26 +++ drivers/gpu/drm/i915/gt/intel_gt_pm.c | 4 +- drivers/gpu/drm/i915/i915_drv.c | 4 +- 10 files changed, 353 insertions(+), 13 deletions(-) create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.h (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 0823ea80fdc6..3b9a60adfe57 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -154,6 +154,7 @@ gem-y += \ gem/i915_gem_throttle.o \ gem/i915_gem_tiling.o \ gem/i915_gem_ttm.o \ + gem/i915_gem_ttm_pm.o \ gem/i915_gem_userptr.o \ gem/i915_gem_wait.o \ gem/i915_gemfs.o diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 2471f36aaff3..734cc8e16481 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -534,6 +534,7 @@ struct drm_i915_gem_object { struct { struct sg_table *cached_io_st; struct i915_gem_object_page_iter get_io_page; + struct drm_i915_gem_object *backup; bool created:1; } ttm; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 8b9d7d14c4bd..12b37b4c1192 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -5,6 +5,7 @@ */ #include "gem/i915_gem_pm.h" +#include "gem/i915_gem_ttm_pm.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "gt/intel_gt_requests.h" @@ -39,6 +40,84 @@ void i915_gem_suspend(struct drm_i915_private *i915) i915_gem_drain_freed_objects(i915); } +static int lmem_restore(struct drm_i915_private *i915, u32 flags) +{ + struct intel_memory_region *mr; + int ret = 0, id; + + for_each_memory_region(mr, i915, id) { + if (mr->type == INTEL_MEMORY_LOCAL) { + ret = i915_ttm_restore_region(mr, flags); + if (ret) + break; + } + } + + return ret; +} + +static int lmem_suspend(struct drm_i915_private *i915, u32 flags) +{ + struct intel_memory_region *mr; + int ret = 0, id; + + for_each_memory_region(mr, i915, id) { + if (mr->type == INTEL_MEMORY_LOCAL) { + ret = i915_ttm_backup_region(mr, flags); + if (ret) + break; + } + } + + return ret; +} + +static void lmem_recover(struct drm_i915_private *i915) +{ + struct intel_memory_region *mr; + int id; + + for_each_memory_region(mr, i915, id) + if (mr->type == INTEL_MEMORY_LOCAL) + i915_ttm_recover_region(mr); +} + +int i915_gem_backup_suspend(struct drm_i915_private *i915) +{ + int ret; + + /* Opportunistically try to evict unpinned objects */ + ret = lmem_suspend(i915, I915_TTM_BACKUP_ALLOW_GPU); + if (ret) + goto out_recover; + + i915_gem_suspend(i915); + + /* + * More objects may have become unpinned as requests were + * retired. Now try to evict again. The gt may be wedged here + * in which case we automatically fall back to memcpy. + */ + ret = lmem_suspend(i915, I915_TTM_BACKUP_ALLOW_GPU); + if (ret) + goto out_recover; + + /* + * Remaining objects are backed up using memcpy once we've stopped + * using the migrate context. + */ + ret = lmem_suspend(i915, I915_TTM_BACKUP_PINNED); + if (ret) + goto out_recover; + + return 0; + +out_recover: + lmem_recover(i915); + + return ret; +} + void i915_gem_suspend_late(struct drm_i915_private *i915) { struct drm_i915_gem_object *obj; @@ -128,12 +207,20 @@ int i915_gem_freeze_late(struct drm_i915_private *i915) void i915_gem_resume(struct drm_i915_private *i915) { + int ret; + GEM_TRACE("%s\n", dev_name(i915->drm.dev)); + ret = lmem_restore(i915, 0); + GEM_WARN_ON(ret); + /* * As we didn't flush the kernel context before suspend, we cannot * guarantee that the context image is complete. So let's just reset * it and start again. */ intel_gt_resume(&i915->gt); + + ret = lmem_restore(i915, I915_TTM_BACKUP_ALLOW_GPU); + GEM_WARN_ON(ret); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.h b/drivers/gpu/drm/i915/gem/i915_gem_pm.h index c9a66630e92e..bedf1e95941a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.h @@ -18,6 +18,7 @@ void i915_gem_idle_work_handler(struct work_struct *work); void i915_gem_suspend(struct drm_i915_private *i915); void i915_gem_suspend_late(struct drm_i915_private *i915); +int i915_gem_backup_suspend(struct drm_i915_private *i915); int i915_gem_freeze(struct drm_i915_private *i915); int i915_gem_freeze_late(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index ce2b7386e494..bbaaee5668e4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -10,18 +10,16 @@ #include "intel_memory_region.h" #include "intel_region_ttm.h" +#include "gem/i915_gem_mman.h" #include "gem/i915_gem_object.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" -#include "gem/i915_gem_mman.h" +#include "gem/i915_gem_ttm_pm.h" -#include "gt/intel_migrate.h" -#include "gt/intel_engine_pm.h" -#define I915_PL_LMEM0 TTM_PL_PRIV -#define I915_PL_SYSTEM TTM_PL_SYSTEM -#define I915_PL_STOLEN TTM_PL_VRAM -#define I915_PL_GGTT TTM_PL_TT +#include "gt/intel_engine_pm.h" +#include "gt/intel_gt.h" +#include "gt/intel_migrate.h" #define I915_TTM_PRIO_PURGE 0 #define I915_TTM_PRIO_NO_PAGES 1 @@ -64,6 +62,20 @@ static struct ttm_placement i915_sys_placement = { .busy_placement = &sys_placement_flags, }; +/** + * i915_ttm_sys_placement - Return the struct ttm_placement to be + * used for an object in system memory. + * + * Rather than making the struct extern, use this + * function. + * + * Return: A pointer to a static variable for sys placement. + */ +struct ttm_placement *i915_ttm_sys_placement(void) +{ + return &i915_sys_placement; +} + static int i915_ttm_err_to_gem(int err) { /* Fastpath */ @@ -443,7 +455,7 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, enum i915_cache_level src_level, dst_level; int ret; - if (!i915->gt.migrate.context) + if (!i915->gt.migrate.context || intel_gt_is_wedged(&i915->gt)) return -EINVAL; dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm); @@ -887,6 +899,8 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + i915_ttm_backup_free(obj); + /* This releases all gem object bindings to the backend. */ __i915_gem_free_object(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index 34ac78d47b0d..0b7291dd897c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -50,4 +50,14 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, struct drm_i915_gem_object *src, bool allow_accel, bool intr); + +/* Internal I915 TTM declarations and definitions below. */ + +#define I915_PL_LMEM0 TTM_PL_PRIV +#define I915_PL_SYSTEM TTM_PL_SYSTEM +#define I915_PL_STOLEN TTM_PL_VRAM +#define I915_PL_GGTT TTM_PL_TT + +struct ttm_placement *i915_ttm_sys_placement(void); + #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c new file mode 100644 index 000000000000..cb1c46724f70 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include +#include + +#include "i915_drv.h" +#include "intel_memory_region.h" +#include "intel_region_ttm.h" + +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_ttm.h" +#include "gem/i915_gem_ttm_pm.h" + +/** + * i915_ttm_backup_free - Free any backup attached to this object + * @obj: The object whose backup is to be freed. + */ +void i915_ttm_backup_free(struct drm_i915_gem_object *obj) +{ + if (obj->ttm.backup) { + i915_gem_object_put(obj->ttm.backup); + obj->ttm.backup = NULL; + } +} + +/** + * struct i915_gem_ttm_pm_apply - Apply-to-region subclass for restore + * @base: The i915_gem_apply_to_region we derive from. + * @allow_gpu: Whether using the gpu blitter is allowed. + * @backup_pinned: On backup, backup also pinned objects. + */ +struct i915_gem_ttm_pm_apply { + struct i915_gem_apply_to_region base; + bool allow_gpu : 1; + bool backup_pinned : 1; +}; + +static int i915_ttm_backup(struct i915_gem_apply_to_region *apply, + struct drm_i915_gem_object *obj) +{ + struct i915_gem_ttm_pm_apply *pm_apply = + container_of(apply, typeof(*pm_apply), base); + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct ttm_buffer_object *backup_bo; + struct drm_i915_private *i915 = + container_of(bo->bdev, typeof(*i915), bdev); + struct drm_i915_gem_object *backup; + struct ttm_operation_ctx ctx = {}; + int err = 0; + + if (bo->resource->mem_type == I915_PL_SYSTEM || obj->ttm.backup) + return 0; + + if (pm_apply->allow_gpu && i915_gem_object_evictable(obj)) + return ttm_bo_validate(bo, i915_ttm_sys_placement(), &ctx); + + if (!pm_apply->backup_pinned) + return 0; + + backup = i915_gem_object_create_shmem(i915, obj->base.size); + if (IS_ERR(backup)) + return PTR_ERR(backup); + + err = i915_gem_object_lock(backup, apply->ww); + if (err) + goto out_no_lock; + + backup_bo = i915_gem_to_ttm(backup); + err = ttm_tt_populate(backup_bo->bdev, backup_bo->ttm, &ctx); + if (err) + goto out_no_populate; + + err = i915_gem_obj_copy_ttm(backup, obj, pm_apply->allow_gpu, false); + GEM_WARN_ON(err); + + obj->ttm.backup = backup; + return 0; + +out_no_populate: + i915_gem_ww_unlock_single(backup); +out_no_lock: + i915_gem_object_put(backup); + + return err; +} + +static int i915_ttm_recover(struct i915_gem_apply_to_region *apply, + struct drm_i915_gem_object *obj) +{ + i915_ttm_backup_free(obj); + return 0; +} + +/** + * i915_ttm_recover_region - Free the backup of all objects of a region + * @mr: The memory region + * + * Checks all objects of a region if there is backup attached and if so + * frees that backup. Typically this is called to recover after a partially + * performed backup. + */ +void i915_ttm_recover_region(struct intel_memory_region *mr) +{ + static const struct i915_gem_apply_to_region_ops recover_ops = { + .process_obj = i915_ttm_recover, + }; + struct i915_gem_apply_to_region apply = {.ops = &recover_ops}; + int ret; + + ret = i915_gem_process_region(mr, &apply); + GEM_WARN_ON(ret); +} + +/** + * i915_ttm_backup_region - Back up all objects of a region to smem. + * @mr: The memory region + * @allow_gpu: Whether to allow the gpu blitter for this backup. + * @backup_pinned: Backup also pinned objects. + * + * Loops over all objects of a region and either evicts them if they are + * evictable or backs them up using a backup object if they are pinned. + * + * Return: Zero on success. Negative error code on error. + */ +int i915_ttm_backup_region(struct intel_memory_region *mr, u32 flags) +{ + static const struct i915_gem_apply_to_region_ops backup_ops = { + .process_obj = i915_ttm_backup, + }; + struct i915_gem_ttm_pm_apply pm_apply = { + .base = {.ops = &backup_ops}, + .allow_gpu = flags & I915_TTM_BACKUP_ALLOW_GPU, + .backup_pinned = flags & I915_TTM_BACKUP_PINNED, + }; + + return i915_gem_process_region(mr, &pm_apply.base); +} + +static int i915_ttm_restore(struct i915_gem_apply_to_region *apply, + struct drm_i915_gem_object *obj) +{ + struct i915_gem_ttm_pm_apply *pm_apply = + container_of(apply, typeof(*pm_apply), base); + struct drm_i915_gem_object *backup = obj->ttm.backup; + struct ttm_buffer_object *backup_bo = i915_gem_to_ttm(backup); + struct ttm_operation_ctx ctx = {}; + int err; + + if (!backup) + return 0; + + if (!pm_apply->allow_gpu && (obj->flags & I915_BO_ALLOC_USER)) + return 0; + + err = i915_gem_object_lock(backup, apply->ww); + if (err) + return err; + + /* Content may have been swapped. */ + err = ttm_tt_populate(backup_bo->bdev, backup_bo->ttm, &ctx); + if (!err) { + err = i915_gem_obj_copy_ttm(obj, backup, pm_apply->allow_gpu, + false); + GEM_WARN_ON(err); + + obj->ttm.backup = NULL; + err = 0; + } + + i915_gem_ww_unlock_single(backup); + + if (!err) + i915_gem_object_put(backup); + + return err; +} + +/** + * i915_ttm_restore_region - Restore backed-up objects of a region from smem. + * @mr: The memory region + * @allow_gpu: Whether to allow the gpu blitter to recover. + * + * Loops over all objects of a region and if they are backed-up, restores + * them from smem. + * + * Return: Zero on success. Negative error code on error. + */ +int i915_ttm_restore_region(struct intel_memory_region *mr, u32 flags) +{ + static const struct i915_gem_apply_to_region_ops restore_ops = { + .process_obj = i915_ttm_restore, + }; + struct i915_gem_ttm_pm_apply pm_apply = { + .base = {.ops = &restore_ops}, + .allow_gpu = flags & I915_TTM_BACKUP_ALLOW_GPU, + }; + + return i915_gem_process_region(mr, &pm_apply.base); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.h new file mode 100644 index 000000000000..25ed67a31571 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _I915_GEM_TTM_PM_H_ +#define _I915_GEM_TTM_PM_H_ + +#include + +struct intel_memory_region; +struct drm_i915_gem_object; + +#define I915_TTM_BACKUP_ALLOW_GPU BIT(0) +#define I915_TTM_BACKUP_PINNED BIT(1) + +int i915_ttm_backup_region(struct intel_memory_region *mr, u32 flags); + +void i915_ttm_recover_region(struct intel_memory_region *mr); + +int i915_ttm_restore_region(struct intel_memory_region *mr, u32 flags); + +/* Internal I915 TTM functions below. */ +void i915_ttm_backup_free(struct drm_i915_gem_object *obj); + +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index f84f2bfe2de0..e9da36530b45 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -297,8 +297,6 @@ void intel_gt_suspend_prepare(struct intel_gt *gt) { user_forcewake(gt, true); wait_for_suspend(gt); - - intel_uc_suspend(>->uc); } static suspend_state_t pm_suspend_target(void) @@ -322,6 +320,8 @@ void intel_gt_suspend_late(struct intel_gt *gt) GEM_BUG_ON(gt->awake); + intel_uc_suspend(>->uc); + /* * On disabling the device, we want to turn off HW access to memory * that we no longer own. diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 3cf61bead2f6..ed7b421cad44 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1094,9 +1094,7 @@ static int i915_drm_prepare(struct drm_device *dev) * split out that work and pull it forward so that after point, * the GPU is not woken again. */ - i915_gem_suspend(i915); - - return 0; + return i915_gem_backup_suspend(i915); } static int i915_drm_suspend(struct drm_device *dev) -- cgit v1.2.3-70-g09d2 From 3e42cc61275f95fd7f022b6380b95428efe134d3 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 22 Sep 2021 08:25:23 +0200 Subject: drm/i915/gt: Register the migrate contexts with their engines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pinned contexts, like the migrate contexts need reset after resume since their context image may have been lost. Also the GuC needs to register pinned contexts. Add a list to struct intel_engine_cs where we add all pinned contexts on creation, and traverse that list at resume time to reset the pinned contexts. This fixes the kms_pipe_crc_basic@suspend-read-crc-pipe-a selftest for now, but proper LMEM backup / restore is needed for full suspend functionality. However, note that even with full LMEM backup / restore it may be desirable to keep the reset since backing up the migrate context images must happen using memcpy() after the migrate context has become inactive, and for performance- and other reasons we want to avoid memcpy() from LMEM. Also traverse the list at guc_init_lrc_mapping() calling guc_kernel_context_pin() for the pinned contexts, like is already done for the kernel context. v2: - Don't reset the contexts on each __engine_unpark() but rather at resume time (Chris Wilson). v3: - Reset contexts in the engine sanitize callback. (Chris Wilson) Cc: Tvrtko Ursulin Cc: Matthew Auld Cc: Maarten Lankhorst Cc: Brost Matthew Cc: Chris Wilson Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210922062527.865433-6-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_context_types.h | 8 ++++++++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 ++++ drivers/gpu/drm/i915/gt/intel_engine_pm.c | 23 ++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_engine_pm.h | 2 ++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 7 +++++++ .../gpu/drm/i915/gt/intel_execlists_submission.c | 2 ++ drivers/gpu/drm/i915/gt/intel_ring_submission.c | 3 +++ drivers/gpu/drm/i915/gt/mock_engine.c | 2 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 12 ++++++++--- 9 files changed, 60 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 930569a1a01f..12252c411159 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -153,6 +153,14 @@ struct intel_context { /** sseu: Control eu/slice partitioning */ struct intel_sseu sseu; + /** + * pinned_contexts_link: List link for the engine's pinned contexts. + * This is only used if this is a perma-pinned kernel context and + * the list is assumed to only be manipulated during driver load + * or unload time so no mutex protection currently. + */ + struct list_head pinned_contexts_link; + u8 wa_bb_page; /* if set, page num reserved for context workarounds */ struct { diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 06dfe7f38953..2ae57e4656a3 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -320,6 +320,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES); + INIT_LIST_HEAD(&engine->pinned_contexts_list); engine->id = id; engine->legacy_idx = INVALID_ENGINE; engine->mask = BIT(id); @@ -890,6 +891,8 @@ intel_engine_create_pinned_context(struct intel_engine_cs *engine, return ERR_PTR(err); } + list_add_tail(&ce->pinned_contexts_link, &engine->pinned_contexts_list); + /* * Give our perma-pinned kernel timelines a separate lockdep class, * so that we can use them from within the normal user timelines @@ -912,6 +915,7 @@ void intel_engine_destroy_pinned_context(struct intel_context *ce) list_del(&ce->timeline->engine_link); mutex_unlock(&hwsp->vm->mutex); + list_del(&ce->pinned_contexts_link); intel_context_unpin(ce); intel_context_put(ce); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 1f07ac4e0672..dacd62773735 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -298,6 +298,29 @@ void intel_engine_init__pm(struct intel_engine_cs *engine) intel_engine_init_heartbeat(engine); } +/** + * intel_engine_reset_pinned_contexts - Reset the pinned contexts of + * an engine. + * @engine: The engine whose pinned contexts we want to reset. + * + * Typically the pinned context LMEM images lose or get their content + * corrupted on suspend. This function resets their images. + */ +void intel_engine_reset_pinned_contexts(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + + list_for_each_entry(ce, &engine->pinned_contexts_list, + pinned_contexts_link) { + /* kernel context gets reset at __engine_unpark() */ + if (ce == engine->kernel_context) + continue; + + dbg_poison_ce(ce); + ce->ops->reset(ce); + } +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_engine_pm.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index 70ea46d6cfb0..8520c595f5e1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -69,4 +69,6 @@ intel_engine_create_kernel_request(struct intel_engine_cs *engine) void intel_engine_init__pm(struct intel_engine_cs *engine); +void intel_engine_reset_pinned_contexts(struct intel_engine_cs *engine); + #endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index bfbfe53c23dd..5ae1207c363b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -307,6 +307,13 @@ struct intel_engine_cs { struct intel_context *kernel_context; /* pinned */ + /** + * pinned_contexts_list: List of pinned contexts. This list is only + * assumed to be manipulated during driver load- or unload time and + * does therefore not have any additional protection. + */ + struct list_head pinned_contexts_list; + intel_engine_mask_t saturated; /* submitting semaphores too late? */ struct { diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 87c595e4efe2..7147fe80919e 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2787,6 +2787,8 @@ static void execlists_sanitize(struct intel_engine_cs *engine) /* And scrub the dirty cachelines for the HWSP */ clflush_cache_range(engine->status_page.addr, PAGE_SIZE); + + intel_engine_reset_pinned_contexts(engine); } static void enable_error_interrupt(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 3c65efcb7bed..593524195707 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -17,6 +17,7 @@ #include "intel_ring.h" #include "shmem_utils.h" #include "intel_engine_heartbeat.h" +#include "intel_engine_pm.h" /* Rough estimate of the typical request size, performing a flush, * set-context and then emitting the batch. @@ -292,6 +293,8 @@ static void xcs_sanitize(struct intel_engine_cs *engine) /* And scrub the dirty cachelines for the HWSP */ clflush_cache_range(engine->status_page.addr, PAGE_SIZE); + + intel_engine_reset_pinned_contexts(engine); } static void reset_prepare(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 2c1af030310c..8b89215afe46 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -376,6 +376,8 @@ int mock_engine_init(struct intel_engine_cs *engine) { struct intel_context *ce; + INIT_LIST_HEAD(&engine->pinned_contexts_list); + engine->sched_engine = i915_sched_engine_create(ENGINE_MOCK); if (!engine->sched_engine) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index c7a41802b448..ba0de35f6323 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2477,6 +2477,8 @@ static void guc_sanitize(struct intel_engine_cs *engine) /* And scrub the dirty cachelines for the HWSP */ clflush_cache_range(engine->status_page.addr, PAGE_SIZE); + + intel_engine_reset_pinned_contexts(engine); } static void setup_hwsp(struct intel_engine_cs *engine) @@ -2552,9 +2554,13 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc) * and even it did this code would be run again. */ - for_each_engine(engine, gt, id) - if (engine->kernel_context) - guc_kernel_context_pin(guc, engine->kernel_context); + for_each_engine(engine, gt, id) { + struct intel_context *ce; + + list_for_each_entry(ce, &engine->pinned_contexts_list, + pinned_contexts_link) + guc_kernel_context_pin(guc, ce); + } } static void guc_release(struct intel_engine_cs *engine) -- cgit v1.2.3-70-g09d2 From 0d8ee5ba8db46c1c833f212a85f8f6d79286722a Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 22 Sep 2021 08:25:24 +0200 Subject: drm/i915: Don't back up pinned LMEM context images and rings during suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pinned context images are now reset during resume. Don't back them up, and assuming that rings can be assumed empty at suspend, don't back them up either. Introduce a new object flag, I915_BO_ALLOC_PM_VOLATILE meaning that an object is allowed to lose its content on suspend. v3: - Slight documentation clarification (Matthew Auld) Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210922062527.865433-7-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 17 ++++++++++------- drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c | 3 +++ drivers/gpu/drm/i915/gt/intel_lrc.c | 3 ++- drivers/gpu/drm/i915/gt/intel_ring.c | 3 ++- 4 files changed, 17 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 734cc8e16481..118691ce81d7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -288,16 +288,19 @@ struct drm_i915_gem_object { I915_SELFTEST_DECLARE(struct list_head st_link); unsigned long flags; -#define I915_BO_ALLOC_CONTIGUOUS BIT(0) -#define I915_BO_ALLOC_VOLATILE BIT(1) -#define I915_BO_ALLOC_CPU_CLEAR BIT(2) -#define I915_BO_ALLOC_USER BIT(3) +#define I915_BO_ALLOC_CONTIGUOUS BIT(0) +#define I915_BO_ALLOC_VOLATILE BIT(1) +#define I915_BO_ALLOC_CPU_CLEAR BIT(2) +#define I915_BO_ALLOC_USER BIT(3) +/* Object is allowed to lose its contents on suspend / resume, even if pinned */ +#define I915_BO_ALLOC_PM_VOLATILE BIT(4) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ I915_BO_ALLOC_CPU_CLEAR | \ - I915_BO_ALLOC_USER) -#define I915_BO_READONLY BIT(4) -#define I915_TILING_QUIRK_BIT 5 /* unknown swizzling; do not release! */ + I915_BO_ALLOC_USER | \ + I915_BO_ALLOC_PM_VOLATILE) +#define I915_BO_READONLY BIT(5) +#define I915_TILING_QUIRK_BIT 6 /* unknown swizzling; do not release! */ /** * @mem_flags - Mutable placement-related flags diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c index cb1c46724f70..03a00d193f40 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c @@ -60,6 +60,9 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region *apply, if (!pm_apply->backup_pinned) return 0; + if (obj->flags & I915_BO_ALLOC_PM_VOLATILE) + return 0; + backup = i915_gem_object_create_shmem(i915, obj->base.size); if (IS_ERR(backup)) return PTR_ERR(backup); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 6ba8daea2f56..3ef9eaf8c50e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -942,7 +942,8 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) context_size += PAGE_SIZE; } - obj = i915_gem_object_create_lmem(engine->i915, context_size, 0); + obj = i915_gem_object_create_lmem(engine->i915, context_size, + I915_BO_ALLOC_PM_VOLATILE); if (IS_ERR(obj)) obj = i915_gem_object_create_shmem(engine->i915, context_size); if (IS_ERR(obj)) diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 7c4d5158e03b..2fdd52b62092 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -112,7 +112,8 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) struct drm_i915_gem_object *obj; struct i915_vma *vma; - obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE); + obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE | + I915_BO_ALLOC_PM_VOLATILE); if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt)) obj = i915_gem_object_create_stolen(i915, size); if (IS_ERR(obj)) -- cgit v1.2.3-70-g09d2 From a259cc14eca8af7955f340c387ab843f2f7389f5 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 22 Sep 2021 08:25:25 +0200 Subject: drm/i915: Reduce the number of objects subject to memcpy recover MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We really only need memcpy restore for objects that affect the operability of the migrate context. That is, primarily the page-table objects of the migrate VM. Add an object flag, I915_BO_ALLOC_PM_EARLY for objects that need early restores using memcpy and a way to assign LMEM page-table object flags to be used by the vms. Restore objects without this flag with the gpu blitter and only objects carrying the flag using TTM memcpy. Initially mark the migrate, gt, gtt and vgpu vms to use this flag, and defer for a later audit which vms actually need it. Most importantly, user- allocated vms with pinned page-table objects can be restored using the blitter. Performance-wise memcpy restore is probably as fast as gpu restore if not faster, but using gpu restore will help tackling future restrictions in mappable LMEM size. v4: - Don't mark the aliasing ppgtt page table flags for early resume, but rather the ggtt page table flags as intended. (Matthew Auld) - The check for user buffer objects during early resume is pointless, since they are never marked I915_BO_ALLOC_PM_EARLY. (Matthew Auld) v5: - Mark GuC LMEM objects with I915_BO_ALLOC_PM_EARLY to have them restored before we fire up the migrate context. Cc: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210922062527.865433-8-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 4 ++-- drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 9 ++++++--- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 6 +++++- drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c | 5 +++-- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 2 +- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 5 +++-- drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 4 +++- drivers/gpu/drm/i915/gt/intel_ggtt.c | 3 ++- drivers/gpu/drm/i915/gt/intel_gt.c | 2 +- drivers/gpu/drm/i915/gt/intel_gtt.c | 3 ++- drivers/gpu/drm/i915/gt/intel_gtt.h | 9 +++++++-- drivers/gpu/drm/i915/gt/intel_migrate.c | 2 +- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 13 ++++++++----- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 3 ++- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 7 +++++-- drivers/gpu/drm/i915/gvt/scheduler.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 4 ++-- 19 files changed, 56 insertions(+), 31 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index c2ab0e22db0a..8208fd5b72c3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1287,7 +1287,7 @@ i915_gem_create_context(struct drm_i915_private *i915, } else if (HAS_FULL_PPGTT(i915)) { struct i915_ppgtt *ppgtt; - ppgtt = i915_ppgtt_create(&i915->gt); + ppgtt = i915_ppgtt_create(&i915->gt, 0); if (IS_ERR(ppgtt)) { drm_dbg(&i915->drm, "PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); @@ -1465,7 +1465,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, if (args->flags) return -EINVAL; - ppgtt = i915_ppgtt_create(&i915->gt); + ppgtt = i915_ppgtt_create(&i915->gt, 0); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 118691ce81d7..fa2ba9e2a4d0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -294,13 +294,16 @@ struct drm_i915_gem_object { #define I915_BO_ALLOC_USER BIT(3) /* Object is allowed to lose its contents on suspend / resume, even if pinned */ #define I915_BO_ALLOC_PM_VOLATILE BIT(4) +/* Object needs to be restored early using memcpy during resume */ +#define I915_BO_ALLOC_PM_EARLY BIT(5) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ I915_BO_ALLOC_CPU_CLEAR | \ I915_BO_ALLOC_USER | \ - I915_BO_ALLOC_PM_VOLATILE) -#define I915_BO_READONLY BIT(5) -#define I915_TILING_QUIRK_BIT 6 /* unknown swizzling; do not release! */ + I915_BO_ALLOC_PM_VOLATILE | \ + I915_BO_ALLOC_PM_EARLY) +#define I915_BO_READONLY BIT(6) +#define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */ /** * @mem_flags - Mutable placement-related flags diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 12b37b4c1192..726b40e1fbb0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -97,8 +97,12 @@ int i915_gem_backup_suspend(struct drm_i915_private *i915) * More objects may have become unpinned as requests were * retired. Now try to evict again. The gt may be wedged here * in which case we automatically fall back to memcpy. + * We allow also backing up pinned objects that have not been + * marked for early recover, and that may contain, for example, + * page-tables for the migrate context. */ - ret = lmem_suspend(i915, I915_TTM_BACKUP_ALLOW_GPU); + ret = lmem_suspend(i915, I915_TTM_BACKUP_ALLOW_GPU | + I915_TTM_BACKUP_PINNED); if (ret) goto out_recover; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c index 03a00d193f40..3b6d14b5c604 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c @@ -57,7 +57,8 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region *apply, if (pm_apply->allow_gpu && i915_gem_object_evictable(obj)) return ttm_bo_validate(bo, i915_ttm_sys_placement(), &ctx); - if (!pm_apply->backup_pinned) + if (!pm_apply->backup_pinned || + (pm_apply->allow_gpu && (obj->flags & I915_BO_ALLOC_PM_EARLY))) return 0; if (obj->flags & I915_BO_ALLOC_PM_VOLATILE) @@ -155,7 +156,7 @@ static int i915_ttm_restore(struct i915_gem_apply_to_region *apply, if (!backup) return 0; - if (!pm_apply->allow_gpu && (obj->flags & I915_BO_ALLOC_USER)) + if (!pm_apply->allow_gpu && !(obj->flags & I915_BO_ALLOC_PM_EARLY)) return 0; err = i915_gem_object_lock(backup, apply->ww); diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 0827634c842c..77d84a9e8789 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1645,7 +1645,7 @@ int i915_gem_huge_page_mock_selftests(void) mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL; mkwrite_device_info(dev_priv)->ppgtt_size = 48; - ppgtt = i915_ppgtt_create(&dev_priv->gt); + ppgtt = i915_ppgtt_create(&dev_priv->gt, 0); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); goto out_unlock; diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 1aee5e6b1b23..890191f286e3 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -429,7 +429,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) mutex_init(&ppgtt->flush); mutex_init(&ppgtt->pin_mutex); - ppgtt_init(&ppgtt->base, gt); + ppgtt_init(&ppgtt->base, gt, 0); ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t)); ppgtt->base.vm.top = 1; diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 6a5af995f5b1..037a9a6e4889 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -753,7 +753,8 @@ err_pd: * space. * */ -struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) +struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, + unsigned long lmem_pt_obj_flags) { struct i915_ppgtt *ppgtt; int err; @@ -762,7 +763,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) if (!ppgtt) return ERR_PTR(-ENOMEM); - ppgtt_init(ppgtt, gt); + ppgtt_init(ppgtt, gt, lmem_pt_obj_flags); ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2; ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t)); diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h index b9028c2ad3c7..f541d19264b4 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h @@ -12,7 +12,9 @@ struct i915_address_space; struct intel_gt; enum i915_cache_level; -struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt); +struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, + unsigned long lmem_pt_obj_flags); + u64 gen8_ggtt_pte_encode(dma_addr_t addr, enum i915_cache_level level, u32 flags); diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 8d71f67926f1..06576fc1310e 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -644,7 +644,7 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) struct i915_ppgtt *ppgtt; int err; - ppgtt = i915_ppgtt_create(ggtt->vm.gt); + ppgtt = i915_ppgtt_create(ggtt->vm.gt, 0); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); @@ -909,6 +909,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) size = gen8_get_total_gtt_size(snb_gmch_ctl); ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY; ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; ggtt->vm.cleanup = gen6_gmch_remove; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 5753c5943ed9..4037c3778225 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -481,7 +481,7 @@ static void intel_gt_fini_scratch(struct intel_gt *gt) static struct i915_address_space *kernel_vm(struct intel_gt *gt) { if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING) - return &i915_ppgtt_create(gt)->vm; + return &i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY)->vm; else return i915_vm_get(>->ggtt->vm); } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index a0c2b952aa57..67d14afa6623 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -28,7 +28,8 @@ struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) * used the passed in size for the page size, which should ensure it * also has the same alignment. */ - obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, 0); + obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, + vm->lmem_pt_obj_flags); /* * Ensure all paging structures for this vm share the same dma-resv * object underneath, with the idea that one object_lock() will lock diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 5b539bd7645d..bc6750263359 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -260,6 +260,9 @@ struct i915_address_space { u8 pd_shift; u8 scratch_order; + /* Flags used when creating page-table objects for this vm */ + unsigned long lmem_pt_obj_flags; + struct drm_i915_gem_object * (*alloc_pt_dma)(struct i915_address_space *vm, int sz); @@ -519,7 +522,8 @@ i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n) return __px_dma(pt ? px_base(pt) : ppgtt->vm.scratch[ppgtt->vm.top]); } -void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt); +void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt, + unsigned long lmem_pt_obj_flags); int i915_ggtt_probe_hw(struct drm_i915_private *i915); int i915_ggtt_init_hw(struct drm_i915_private *i915); @@ -537,7 +541,8 @@ static inline bool i915_ggtt_has_aperture(const struct i915_ggtt *ggtt) int i915_ppgtt_init_hw(struct intel_gt *gt); -struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt); +struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt, + unsigned long lmem_pt_obj_flags); void i915_ggtt_suspend(struct i915_ggtt *gtt); void i915_ggtt_resume(struct i915_ggtt *ggtt); diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 1dac21aa7e5c..afb1cce9a352 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -78,7 +78,7 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt) * TODO: Add support for huge LMEM PTEs */ - vm = i915_ppgtt_create(gt); + vm = i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY); if (IS_ERR(vm)) return ERR_CAST(vm); diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index 886060f7e6fc..4396bfd630d8 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -155,19 +155,20 @@ int i915_ppgtt_init_hw(struct intel_gt *gt) } static struct i915_ppgtt * -__ppgtt_create(struct intel_gt *gt) +__ppgtt_create(struct intel_gt *gt, unsigned long lmem_pt_obj_flags) { if (GRAPHICS_VER(gt->i915) < 8) return gen6_ppgtt_create(gt); else - return gen8_ppgtt_create(gt); + return gen8_ppgtt_create(gt, lmem_pt_obj_flags); } -struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt) +struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt, + unsigned long lmem_pt_obj_flags) { struct i915_ppgtt *ppgtt; - ppgtt = __ppgtt_create(gt); + ppgtt = __ppgtt_create(gt, lmem_pt_obj_flags); if (IS_ERR(ppgtt)) return ppgtt; @@ -298,7 +299,8 @@ int ppgtt_set_pages(struct i915_vma *vma) return 0; } -void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt) +void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt, + unsigned long lmem_pt_obj_flags) { struct drm_i915_private *i915 = gt->i915; @@ -306,6 +308,7 @@ void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt) ppgtt->vm.i915 = i915; ppgtt->vm.dma = i915->drm.dev; ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size); + ppgtt->vm.lmem_pt_obj_flags = lmem_pt_obj_flags; dma_resv_init(&ppgtt->vm._resv); i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 7e6fdabac599..7e2d99dd012d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1596,7 +1596,7 @@ static int igt_reset_evict_ppgtt(void *arg) if (INTEL_PPGTT(gt->i915) < INTEL_PPGTT_FULL) return 0; - ppgtt = i915_ppgtt_create(gt); + ppgtt = i915_ppgtt_create(gt, 0); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 8ffb689066f6..8f8182bf7c11 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -651,7 +651,8 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) if (HAS_LMEM(gt->i915)) obj = i915_gem_object_create_lmem(gt->i915, size, I915_BO_ALLOC_CPU_CLEAR | - I915_BO_ALLOC_CONTIGUOUS); + I915_BO_ALLOC_CONTIGUOUS | + I915_BO_ALLOC_PM_EARLY); else obj = i915_gem_object_create_shmem(gt->i915, size); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index a685d563df72..3aa87be4f2e4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -372,10 +372,13 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) uc_fw->private_data_size = css->private_data_size; - if (HAS_LMEM(i915)) + if (HAS_LMEM(i915)) { obj = i915_gem_object_create_lmem_from_data(i915, fw->data, fw->size); - else + if (!IS_ERR(obj)) + obj->flags |= I915_BO_ALLOC_PM_EARLY; + } else { obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size); + } if (IS_ERR(obj)) { err = PTR_ERR(obj); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index b56a8e37a3cd..0d18e13e3468 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1386,7 +1386,7 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) enum intel_engine_id i; int ret; - ppgtt = i915_ppgtt_create(&i915->gt); + ppgtt = i915_ppgtt_create(&i915->gt, I915_BO_ALLOC_PM_EARLY); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 2d60a5a5b065..46f4236039a9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -155,7 +155,7 @@ static int igt_ppgtt_alloc(void *arg) if (!HAS_PPGTT(dev_priv)) return 0; - ppgtt = i915_ppgtt_create(&dev_priv->gt); + ppgtt = i915_ppgtt_create(&dev_priv->gt, 0); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); @@ -1053,7 +1053,7 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv, if (IS_ERR(file)) return PTR_ERR(file); - ppgtt = i915_ppgtt_create(&dev_priv->gt); + ppgtt = i915_ppgtt_create(&dev_priv->gt, 0); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); goto out_free; -- cgit v1.2.3-70-g09d2 From be988eaee1cb208c4445db46bc3ceaf75f586f0b Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 21 Sep 2021 14:42:02 +0100 Subject: drm/i915/request: fix early tracepoints Currently we blow up in trace_dma_fence_init, when calling into get_driver_name or get_timeline_name, since both the engine and context might be NULL(or contain some garbage address) in the case of newly allocated slab objects via the request ctor. Note that we also use SLAB_TYPESAFE_BY_RCU here, which allows requests to be immediately freed, but delay freeing the underlying page by an RCU grace period. With this scheme requests can be re-allocated, at the same time as they are also being read by some lockless RCU lookup mechanism. In the ctor case, which is only called for new slab objects(i.e allocate new page and call the ctor for each object) it's safe to reset the context/engine prior to calling into dma_fence_init, since we can be certain that no one is doing an RCU lookup which might depend on peeking at the engine/context, like in active_engine(), since the object can't yet be externally visible. In the recycled case(which might also be externally visible) the request refcount always transitions from 0->1 after we set the context/engine etc, which should ensure it's valid to dereference the engine for example, when doing an RCU list-walk, so long as we can also increment the refcount first. If the refcount is already zero, then the request is considered complete/released. If it's non-zero, then the request might be in the process of being re-allocated, or potentially still in flight, however after successfully incrementing the refcount, it's possible to carefully inspect the request state, to determine if the request is still what we were looking for. Note that all externally visible requests returned to the cache must have zero refcount. One possible fix then is to move dma_fence_init out from the request ctor. Originally this was how it was done, but it was moved in: commit 855e39e65cfc33a73724f1cc644ffc5754864a20 Author: Chris Wilson Date: Mon Feb 3 09:41:48 2020 +0000 drm/i915: Initialise basic fence before acquiring seqno where it looks like intel_timeline_get_seqno() relied on some of the rq->fence state, but that is no longer the case since: commit 12ca695d2c1ed26b2dcbb528b42813bd0f216cfc Author: Maarten Lankhorst Date: Tue Mar 23 16:49:50 2021 +0100 drm/i915: Do not share hwsp across contexts any more, v8. intel_timeline_get_seqno() could also be cleaned up slightly by dropping the request argument. Moving dma_fence_init back out of the ctor, should ensure we have enough of the request initialised in case of trace_dma_fence_init. Functionally this should be the same, and is effectively what we were already open coding before, except now we also assign the fence->lock and fence->ops, but since these are invariant for recycled requests(which might be externally visible), and will therefore already hold the same value, it shouldn't matter. An alternative fix, since we don't yet have a fully initialised request when in the ctor, is just setting the context/engine as NULL, but this does require adding some extra handling in get_driver_name etc. v2(Daniel): - Try to make the commit message less confusing Fixes: 855e39e65cfc ("drm/i915: Initialise basic fence before acquiring seqno") Signed-off-by: Matthew Auld Cc: Michael Mason Cc: Daniel Vetter Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210921134202.3803151-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_request.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index ce446716d092..79da5eca60af 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -829,8 +829,6 @@ static void __i915_request_ctor(void *arg) i915_sw_fence_init(&rq->submit, submit_notify); i915_sw_fence_init(&rq->semaphore, semaphore_notify); - dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, 0, 0); - rq->capture_list = NULL; init_llist_head(&rq->execute_cb); @@ -905,17 +903,12 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->ring = ce->ring; rq->execution_mask = ce->engine->mask; - kref_init(&rq->fence.refcount); - rq->fence.flags = 0; - rq->fence.error = 0; - INIT_LIST_HEAD(&rq->fence.cb_list); - ret = intel_timeline_get_seqno(tl, rq, &seqno); if (ret) goto err_free; - rq->fence.context = tl->fence_context; - rq->fence.seqno = seqno; + dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, + tl->fence_context, seqno); RCU_INIT_POINTER(rq->timeline, tl); rq->hwsp_seqno = tl->hwsp_seqno; -- cgit v1.2.3-70-g09d2 From 6341eb6f39bb76018676dc85e01596bf32a592f6 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 21 Sep 2021 15:21:16 +0100 Subject: drm/i915/selftests: exercise shmem_writeback with THP In commit: commit 1e6decf30af5c5c75445ed6ad4e65a26de578a03 Author: Hugh Dickins Date: Thu Sep 2 14:54:43 2021 -0700 shmem: shmem_writepage() split unlikely i915 THP it looks THP + shmem_writeback was an unexpected combination, and ends up hitting some BUG_ON, but it also looks like that is now fixed. While the IGTs did eventually hit this(although not during pre-merge it seems), it's likely worthwhile adding some explicit coverage for this scenario in the shrink_thp selftest. References: https://gitlab.freedesktop.org/drm/intel/-/issues/4166 Signed-off-by: Matthew Auld Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20210921142116.3807946-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 77d84a9e8789..41d0680f3bd7 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1589,7 +1589,8 @@ static int igt_shrink_thp(void *arg) i915_gem_shrink(NULL, i915, -1UL, NULL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | - I915_SHRINK_ACTIVE); + I915_SHRINK_ACTIVE | + I915_SHRINK_WRITEBACK); if (should_swap == i915_gem_object_has_pages(obj)) { pr_err("unexpected pages mismatch, should_swap=%s\n", yesno(should_swap)); -- cgit v1.2.3-70-g09d2 From a837a0686308d95ad9c48d32b4dfe86a17dc98c2 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Tue, 14 Sep 2021 14:34:12 +0530 Subject: drm/i915: Remove warning from the rps worker In commit 4e5c8a99e1cb ("drm/i915: Drop i915_request.lock requirement for intel_rps_boost()"), we decoupled the rps worker from the pm so that we could avoid the synchronization penalty which makes the assertion liable to run too early. Which makes warning invalid hence removed. Fixes: 4e5c8a99e1cb ("drm/i915: Drop i915_request.lock requirement for intel_rps_boost()") Reviewed-by: Chris Wilson Signed-off-by: Tejas Upadhyay Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210914090412.1393498-1-tejaskumarx.surendrakumar.upadhyay@intel.com --- drivers/gpu/drm/i915/gt/intel_rps.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index e1a198bbd135..172de6c9f949 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -882,8 +882,6 @@ void intel_rps_park(struct intel_rps *rps) if (!intel_rps_is_enabled(rps)) return; - GEM_BUG_ON(atomic_read(&rps->num_waiters)); - if (!intel_rps_clear_active(rps)) return; -- cgit v1.2.3-70-g09d2 From 74af1e2c16749514fc8db4fc97e59ce897b73fc9 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Fri, 24 Sep 2021 18:38:25 +0200 Subject: drm/i915: Flush buffer pools on driver remove We currently do an explicit flush of the buffer pools within the call path of drm_driver.release(); this removes all buffers, regardless of their age, freeing the buffers' associated resources (objects, address space areas). However there is other code that runs within the drm_driver.release() call chain that expects objects and their associated address space areas have already been flushed. Since buffer pools auto-flush old buffers once per second in a worker thread, there's a small window where if we remove the driver while there are still objects in buffers with an age of less than one second, the assumptions of the other release code may be violated. By moving the flush to driver remove (which executes earlier via the pci_driver.remove() flow) we're ensuring that all buffers are flushed and their associated objects freed before some other code in pci_driver.remove() flushes those objects so they are released before _any_ code in drm_driver.release() that check completness of those flushes executes. v2: Reword commit description as suggested by Matt. Signed-off-by: Janusz Krzysztofik Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Matt Roper Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210924163825.634606-1-janusz.krzysztofik@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 2 ++ drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 4037c3778225..5b3acf2b064e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -741,6 +741,8 @@ void intel_gt_driver_remove(struct intel_gt *gt) intel_uc_driver_remove(>->uc); intel_engines_release(gt); + + intel_gt_flush_buffer_pool(gt); } void intel_gt_driver_unregister(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c index aa0a59c5b614..acc49c56a9f3 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c @@ -245,8 +245,6 @@ void intel_gt_fini_buffer_pool(struct intel_gt *gt) struct intel_gt_buffer_pool *pool = >->buffer_pool; int n; - intel_gt_flush_buffer_pool(gt); - for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) GEM_BUG_ON(!list_empty(&pool->cache_list[n])); } -- cgit v1.2.3-70-g09d2 From d576b31bdece7b5034047cbe21170e948198d32f Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 24 Sep 2021 15:46:46 +0100 Subject: drm/i915: remember to call i915_sw_fence_fini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Seems to fix some object-debug splat which appeared while debugging something unrelated. v2: s/guc_blocked/guc_state.blocked/ Signed-off-by: Matthew Auld Cc: Ville Syrjälä Cc: Matthew Brost Tested-by: Ville Syrjälä Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20210924144646.4096402-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gt/intel_context.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index ff637147b1a9..e9a0cad5c34d 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -419,6 +419,7 @@ void intel_context_fini(struct intel_context *ce) mutex_destroy(&ce->pin_mutex); i915_active_fini(&ce->active); + i915_sw_fence_fini(&ce->guc_state.blocked); } void i915_context_module_exit(void) -- cgit v1.2.3-70-g09d2 From 239f3c2ee18376587026efecaea5250fa5926d20 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 28 Jul 2021 14:41:31 +0200 Subject: drm/i915: Fix runtime pm handling in i915_gem_shrink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We forgot to call intel_runtime_pm_put on error, fix it! Signed-off-by: Maarten Lankhorst Fixes: cf41a8f1dc1e ("drm/i915: Finally remove obj->mm.lock.") Cc: Thomas Hellström Cc: Daniel Vetter Cc: # v5.13+ Reviewed-by: Thomas Hellström Reviewed-by: Niranjana Vishwanathapura Link: https://patchwork.freedesktop.org/patch/msgid/20210830121006.2978297-9-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index e382b7f2353b..5ab136ffdeb2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -118,7 +118,7 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww, intel_wakeref_t wakeref = 0; unsigned long count = 0; unsigned long scanned = 0; - int err; + int err = 0; /* CHV + VTD workaround use stop_machine(); need to trylock vm->mutex */ bool trylock_vm = !ww && intel_vm_no_concurrent_access_wa(i915); @@ -242,12 +242,15 @@ skip: list_splice_tail(&still_in_list, phase->list); spin_unlock_irqrestore(&i915->mm.obj_lock, flags); if (err) - return err; + break; } if (shrink & I915_SHRINK_BOUND) intel_runtime_pm_put(&i915->runtime_pm, wakeref); + if (err) + return err; + if (nr_scanned) *nr_scanned += scanned; return count; -- cgit v1.2.3-70-g09d2 From c4f6120302f616a3fd3cd248a102f0ae2a9ba09c Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Sat, 25 Sep 2021 20:46:12 +0800 Subject: drm/i915: Use direction definition DMA_BIDIRECTIONAL instead of PCI_DMA_BIDIRECTIONAL Replace direction definition PCI_DMA_BIDIRECTIONAL with DMA_BIDIRECTIONAL, because it helps to enhance readability and avoid possible inconsistency. Signed-off-by: Cai Huoqing Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210925124613.144-1-caihuoqing@baidu.com --- drivers/gpu/drm/i915/gt/intel_region_lmem.c | 4 ++-- drivers/gpu/drm/i915/gvt/gtt.c | 17 ++++++++--------- drivers/gpu/drm/i915/gvt/kvmgt.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++-- 4 files changed, 14 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index a74b72f50cc9..afb35d2e5c73 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -32,7 +32,7 @@ static int init_fake_lmem_bar(struct intel_memory_region *mem) mem->remap_addr = dma_map_resource(i915->drm.dev, mem->region.start, mem->fake_mappable.size, - PCI_DMA_BIDIRECTIONAL, + DMA_BIDIRECTIONAL, DMA_ATTR_FORCE_CONTIGUOUS); if (dma_mapping_error(i915->drm.dev, mem->remap_addr)) { drm_mm_remove_node(&mem->fake_mappable); @@ -62,7 +62,7 @@ static void release_fake_lmem_bar(struct intel_memory_region *mem) dma_unmap_resource(mem->i915->drm.dev, mem->remap_addr, mem->fake_mappable.size, - PCI_DMA_BIDIRECTIONAL, + DMA_BIDIRECTIONAL, DMA_ATTR_FORCE_CONTIGUOUS); } diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index e5c2fdfc20e3..53d0cb327539 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -745,7 +745,7 @@ static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt) trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type); dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096, - PCI_DMA_BIDIRECTIONAL); + DMA_BIDIRECTIONAL); radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn); @@ -849,7 +849,7 @@ retry: */ spt->shadow_page.type = type; daddr = dma_map_page(kdev, spt->shadow_page.page, - 0, 4096, PCI_DMA_BIDIRECTIONAL); + 0, 4096, DMA_BIDIRECTIONAL); if (dma_mapping_error(kdev, daddr)) { gvt_vgpu_err("fail to map dma addr\n"); ret = -EINVAL; @@ -865,7 +865,7 @@ retry: return spt; err_unmap_dma: - dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + dma_unmap_page(kdev, daddr, PAGE_SIZE, DMA_BIDIRECTIONAL); err_free_spt: free_spt(spt); return ERR_PTR(ret); @@ -2409,8 +2409,7 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu, return -ENOMEM; } - daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0, - 4096, PCI_DMA_BIDIRECTIONAL); + daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0, 4096, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, daddr)) { gvt_vgpu_err("fail to dmamap scratch_pt\n"); __free_page(virt_to_page(scratch_pt)); @@ -2461,7 +2460,7 @@ static int release_scratch_page_tree(struct intel_vgpu *vgpu) if (vgpu->gtt.scratch_pt[i].page != NULL) { daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn << I915_GTT_PAGE_SHIFT); - dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL); + dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL); __free_page(vgpu->gtt.scratch_pt[i].page); vgpu->gtt.scratch_pt[i].page = NULL; vgpu->gtt.scratch_pt[i].page_mfn = 0; @@ -2741,7 +2740,7 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt) } daddr = dma_map_page(dev, virt_to_page(page), 0, - 4096, PCI_DMA_BIDIRECTIONAL); + 4096, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, daddr)) { gvt_err("fail to dmamap scratch ggtt page\n"); __free_page(virt_to_page(page)); @@ -2755,7 +2754,7 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt) ret = setup_spt_oos(gvt); if (ret) { gvt_err("fail to initialize SPT oos\n"); - dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL); + dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL); __free_page(gvt->gtt.scratch_page); return ret; } @@ -2779,7 +2778,7 @@ void intel_gvt_clean_gtt(struct intel_gvt *gvt) dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn << I915_GTT_PAGE_SHIFT); - dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL); + dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL); __free_page(gvt->gtt.scratch_page); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 7efa386449d1..20b82fb036f8 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -328,7 +328,7 @@ static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, return ret; /* Setup DMA mapping. */ - *dma_addr = dma_map_page(dev, page, 0, size, PCI_DMA_BIDIRECTIONAL); + *dma_addr = dma_map_page(dev, page, 0, size, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, *dma_addr)) { gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n", page_to_pfn(page), ret); @@ -344,7 +344,7 @@ static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn, { struct device *dev = vgpu->gvt->gt->i915->drm.dev; - dma_unmap_page(dev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); + dma_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL); gvt_unpin_guest_page(vgpu, gfn, size); } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 36489be4896b..cd5f2348a187 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -30,7 +30,7 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, do { if (dma_map_sg_attrs(obj->base.dev->dev, pages->sgl, pages->nents, - PCI_DMA_BIDIRECTIONAL, + DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN)) @@ -64,7 +64,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, usleep_range(100, 250); dma_unmap_sg(i915->drm.dev, pages->sgl, pages->nents, - PCI_DMA_BIDIRECTIONAL); + DMA_BIDIRECTIONAL); } /** -- cgit v1.2.3-70-g09d2 From 068396bb21c8aa3b2f797c58eb9e623d7cf271bb Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 30 Sep 2021 13:32:36 +0200 Subject: drm/i915/ttm: Rework object initialization slightly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We may end up in i915_ttm_bo_destroy() in an error path before the object is fully initialized. In that case it's not correct to call __i915_gem_free_object(), because that function a) Assumes the gem object refcount is 0, which it isn't. b) frees the placements which are owned by the caller until the init_object() region ops returns successfully. Fix this by providing a lightweight cleanup function __i915_gem_object_fini() which is also called by __i915_gem_free_object(). While doing this, also make sure we call dma_resv_fini() as part of ordinary object destruction and not from the RCU callback that frees the object. This will help track down bugs where the object is incorrectly locked from an RCU lookup. Finally, make sure the object isn't put on the region list until it's either locked or fully initialized in order to block list processing of partially initialized objects. v2: - The TTM object backend memory was freed before the gem pages were put. Separate this functionality into __i915_gem_object_pages_fini() and call it from the TTM delete_mem_notify() callback. v3: - Include i915_gem_object_free_mmaps() in __i915_gem_object_pages_fini() to make sure we don't inadvertedly introduce a race. Fixes: 48b096126954 ("drm/i915: Move __i915_gem_free_object to ttm_bo_destroy") Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld #v1 Link: https://patchwork.freedesktop.org/patch/msgid/20210930113236.583531-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 43 +++++++++++++++++++++++++----- drivers/gpu/drm/i915/gem/i915_gem_object.h | 5 ++++ drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 36 +++++++++++++++---------- 3 files changed, 64 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 6fb9afb65034..b88b121e244a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -89,6 +89,22 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, mutex_init(&obj->mm.get_dma_page.lock); } +/** + * i915_gem_object_fini - Clean up a GEM object initialization + * @obj: The gem object to cleanup + * + * This function cleans up gem object fields that are set up by + * drm_gem_private_object_init() and i915_gem_object_init(). + * It's primarily intended as a helper for backends that need to + * clean up the gem object in separate steps. + */ +void __i915_gem_object_fini(struct drm_i915_gem_object *obj) +{ + mutex_destroy(&obj->mm.get_page.lock); + mutex_destroy(&obj->mm.get_dma_page.lock); + dma_resv_fini(&obj->base._resv); +} + /** * Mark up the object's coherency levels for a given cache_level * @obj: #drm_i915_gem_object @@ -174,7 +190,6 @@ void __i915_gem_free_object_rcu(struct rcu_head *head) container_of(head, typeof(*obj), rcu); struct drm_i915_private *i915 = to_i915(obj->base.dev); - dma_resv_fini(&obj->base._resv); i915_gem_object_free(obj); GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); @@ -204,10 +219,17 @@ static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj) } } -void __i915_gem_free_object(struct drm_i915_gem_object *obj) +/** + * __i915_gem_object_pages_fini - Clean up pages use of a gem object + * @obj: The gem object to clean up + * + * This function cleans up usage of the object mm.pages member. It + * is intended for backends that need to clean up a gem object in + * separate steps and needs to be called when the object is idle before + * the object's backing memory is freed. + */ +void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj) { - trace_i915_gem_object_destroy(obj); - if (!list_empty(&obj->vma.list)) { struct i915_vma *vma; @@ -233,11 +255,17 @@ void __i915_gem_free_object(struct drm_i915_gem_object *obj) __i915_gem_object_free_mmaps(obj); - GEM_BUG_ON(!list_empty(&obj->lut_list)); - atomic_set(&obj->mm.pages_pin_count, 0); __i915_gem_object_put_pages(obj); GEM_BUG_ON(i915_gem_object_has_pages(obj)); +} + +void __i915_gem_free_object(struct drm_i915_gem_object *obj) +{ + trace_i915_gem_object_destroy(obj); + + GEM_BUG_ON(!list_empty(&obj->lut_list)); + bitmap_free(obj->bit_17); if (obj->base.import_attach) @@ -253,6 +281,8 @@ void __i915_gem_free_object(struct drm_i915_gem_object *obj) if (obj->shares_resv_from) i915_vm_resv_put(obj->shares_resv_from); + + __i915_gem_object_fini(obj); } static void __i915_gem_free_objects(struct drm_i915_private *i915, @@ -266,6 +296,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, obj->ops->delayed_free(obj); continue; } + __i915_gem_object_pages_fini(obj); __i915_gem_free_object(obj); /* But keep the pointer alive for RCU-protected lookups */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 3043fcbd31bd..7f9f2e5ba0ec 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -58,6 +58,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops, struct lock_class_key *key, unsigned alloc_flags); + +void __i915_gem_object_fini(struct drm_i915_gem_object *obj); + struct drm_i915_gem_object * i915_gem_object_create_shmem(struct drm_i915_private *i915, resource_size_t size); @@ -582,6 +585,8 @@ bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj); void __i915_gem_free_object_rcu(struct rcu_head *head); +void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj); + void __i915_gem_free_object(struct drm_i915_gem_object *obj); bool i915_gem_object_evictable(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index bbaaee5668e4..a81bf8b48f79 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -368,8 +368,10 @@ static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - if (likely(obj)) + if (likely(obj)) { + __i915_gem_object_pages_fini(obj); i915_ttm_free_cached_io_st(obj); + } } static struct intel_memory_region * @@ -814,12 +816,9 @@ static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) */ static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) { - if (obj->ttm.created) { - ttm_bo_put(i915_gem_to_ttm(obj)); - } else { - __i915_gem_free_object(obj); - call_rcu(&obj->rcu, __i915_gem_free_object_rcu); - } + GEM_BUG_ON(!obj->ttm.created); + + ttm_bo_put(i915_gem_to_ttm(obj)); } static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) @@ -899,16 +898,19 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - i915_ttm_backup_free(obj); - - /* This releases all gem object bindings to the backend. */ - __i915_gem_free_object(obj); - i915_gem_object_release_memory_region(obj); mutex_destroy(&obj->ttm.get_io_page.lock); - if (obj->ttm.created) + if (obj->ttm.created) { + i915_ttm_backup_free(obj); + + /* This releases all gem object bindings to the backend. */ + __i915_gem_free_object(obj); + call_rcu(&obj->rcu, __i915_gem_free_object_rcu); + } else { + __i915_gem_object_fini(obj); + } } /** @@ -937,7 +939,11 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags); - i915_gem_object_init_memory_region(obj, mem); + + /* Don't put on a region list until we're either locked or fully initialized. */ + obj->mm.region = intel_memory_region_get(mem); + INIT_LIST_HEAD(&obj->mm.region_link); + i915_gem_object_make_unshrinkable(obj); INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->ttm.get_io_page.lock); @@ -964,6 +970,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, return i915_ttm_err_to_gem(ret); obj->ttm.created = true; + i915_gem_object_release_memory_region(obj); + i915_gem_object_init_memory_region(obj, mem); i915_ttm_adjust_domains_after_move(obj); i915_ttm_adjust_gem_after_move(obj); i915_gem_object_unlock(obj); -- cgit v1.2.3-70-g09d2 From 9eddd5a9a2aee15d4f0c701388cbdea70e49c6a9 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 26 Sep 2021 22:10:05 +0200 Subject: drm/i915: Use fixed offset for PTEs location We assumed that for all modern GENs the PTEs and register space are split in the GTTMMADR BAR, but while it is true, we should rather use fixed offset as it is defined in the specification. Bspec: 4409, 4457, 4604, 11181, 9027, 13246, 13321, 44980 Signed-off-by: Michal Wajdeczko Cc: CQ Tang Cc: Matt Roper Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210926201005.1450-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 06576fc1310e..f09def2b7090 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -813,6 +813,21 @@ static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) return 0; } +static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915) +{ + /* + * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset + * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset + */ + GEM_BUG_ON(GRAPHICS_VER(i915) < 6); + return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M; +} + +static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915) +{ + return gen6_gttmmadr_size(i915) / 2; +} + static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) { struct drm_i915_private *i915 = ggtt->vm.i915; @@ -821,8 +836,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) u32 pte_flags; int ret; - /* For Modern GENs the PTEs and register space are split in the BAR */ - phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; + GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915)); + phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915); /* * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range -- cgit v1.2.3-70-g09d2 From 217ecd310d56cca0bfd7c3ee1ff1deafae4ffed1 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 26 Sep 2021 20:45:42 +0200 Subject: drm/i915/guc: Verify result from CTB (de)register action In commit b839a869dfc9 ("drm/i915/guc: Add support for data reporting in GuC responses") we missed the hypothetical case that GuC might return positive non-zero value as success data. While that would be lucky treated as error case, and at the end will result in reporting valid -EIO, in the meantime this value will be passed to ERR_PTR that could be misleading. v2: rebased Reported-by: Dan Carpenter Signed-off-by: Michal Wajdeczko Cc: Dan Carpenter Reviewed-by: Daniel Vetter Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210926184545.1407-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 20c710a74498..c39abb010181 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -168,12 +168,15 @@ static int guc_action_register_ct_buffer(struct intel_guc *guc, u32 type, FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_2_DESC_ADDR, desc_addr), FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_3_BUFF_ADDR, buff_addr), }; + int ret; GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST); GEM_BUG_ON(size % SZ_4K); /* CT registration must go over MMIO */ - return intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0); + ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0); + + return ret > 0 ? -EPROTO : ret; } static int ct_register_buffer(struct intel_guc_ct *ct, u32 type, @@ -201,11 +204,14 @@ static int guc_action_deregister_ct_buffer(struct intel_guc *guc, u32 type) FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_DEREGISTER_CTB), FIELD_PREP(HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_TYPE, type), }; + int ret; GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST); /* CT deregistration must go over MMIO */ - return intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0); + ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0); + + return ret > 0 ? -EPROTO : ret; } static int ct_deregister_buffer(struct intel_guc_ct *ct, u32 type) -- cgit v1.2.3-70-g09d2 From 0de9765da58f933c32e630ee334752d32139caaa Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 26 Sep 2021 20:45:43 +0200 Subject: drm/i915/guc: Print error name on CTB (de)registration failure Instead of plain error value (%d) print more user friendly error name (%pe). Signed-off-by: Michal Wajdeczko Reviewed-by: Daniel Vetter Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210926184545.1407-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index c39abb010181..b6c2f2a11dc5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -191,8 +191,8 @@ static int ct_register_buffer(struct intel_guc_ct *ct, u32 type, err = guc_action_register_ct_buffer(ct_to_guc(ct), type, desc_addr, buff_addr, size); if (unlikely(err)) - CT_ERROR(ct, "Failed to register %s buffer (err=%d)\n", - guc_ct_buffer_type_to_str(type), err); + CT_ERROR(ct, "Failed to register %s buffer (%pe)\n", + guc_ct_buffer_type_to_str(type), ERR_PTR(err)); return err; } @@ -219,8 +219,8 @@ static int ct_deregister_buffer(struct intel_guc_ct *ct, u32 type) int err = guc_action_deregister_ct_buffer(ct_to_guc(ct), type); if (unlikely(err)) - CT_ERROR(ct, "Failed to deregister %s buffer (err=%d)\n", - guc_ct_buffer_type_to_str(type), err); + CT_ERROR(ct, "Failed to deregister %s buffer (%pe)\n", + guc_ct_buffer_type_to_str(type), ERR_PTR(err)); return err; } -- cgit v1.2.3-70-g09d2 From 0e9deac51337139fc8f8a41c6c9a200944ee7aac Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 26 Sep 2021 20:45:44 +0200 Subject: drm/i915/guc: Print error name on CTB send failure Instead of plain error value (%d) print more user friendly error name (%pe). Signed-off-by: Michal Wajdeczko Reviewed-by: Daniel Vetter Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210926184545.1407-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index b6c2f2a11dc5..e03f86d3b0b9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -781,8 +781,8 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, ret = ct_send(ct, action, len, response_buf, response_buf_size, &status); if (unlikely(ret < 0)) { - CT_ERROR(ct, "Sending action %#x failed (err=%d status=%#X)\n", - action[0], ret, status); + CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n", + action[0], ERR_PTR(ret), status); } else if (unlikely(ret)) { CT_DEBUG(ct, "send action %#x returned %d (%#x)\n", action[0], ret, ret); -- cgit v1.2.3-70-g09d2 From fb2d2de3530ab6f93f88aa3b87dfb1382431ca17 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 26 Sep 2021 20:45:45 +0200 Subject: drm/i915/guc: Move and improve error message for missed CTB reply If we timeout waiting for a CT reply we print very simple error message. Improve that and by moving error reporting to the caller we can use CT_ERROR instead of DRM_ERROR and report just fence as error code will be reported later anyway. Signed-off-by: Michal Wajdeczko Reviewed-by: Daniel Vetter Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210926184545.1407-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index e03f86d3b0b9..0a3504bc0b61 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -528,9 +528,6 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status) err = wait_for(done, GUC_CTB_RESPONSE_TIMEOUT_LONG_MS); #undef done - if (unlikely(err)) - DRM_ERROR("CT: fence %u err %d\n", req->fence, err); - *status = req->status; return err; } @@ -728,8 +725,11 @@ retry: err = wait_for_ct_request_update(&request, status); g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); - if (unlikely(err)) + if (unlikely(err)) { + CT_ERROR(ct, "No response for request %#x (fence %u)\n", + action[0], request.fence); goto unlink; + } if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) != GUC_HXG_TYPE_RESPONSE_SUCCESS) { err = -EIO; -- cgit v1.2.3-70-g09d2 From c2004ce99ed73788755d61b7b691ff072fdada20 Mon Sep 17 00:00:00 2001 From: Vitaly Lubart Date: Fri, 24 Sep 2021 12:14:37 -0700 Subject: mei: pxp: export pavp client to me client bus Export PAVP client to work with i915 driver, for binding it uses kernel component framework. v2:drop debug prints, refactor match code to match mei_hdcp (Tomas) Signed-off-by: Vitaly Lubart Signed-off-by: Tomas Winkler Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-3-alan.previn.teres.alexis@intel.com --- drivers/misc/mei/Kconfig | 2 + drivers/misc/mei/Makefile | 1 + drivers/misc/mei/pxp/Kconfig | 13 +++ drivers/misc/mei/pxp/Makefile | 7 ++ drivers/misc/mei/pxp/mei_pxp.c | 229 +++++++++++++++++++++++++++++++++++++++++ drivers/misc/mei/pxp/mei_pxp.h | 18 ++++ 6 files changed, 270 insertions(+) create mode 100644 drivers/misc/mei/pxp/Kconfig create mode 100644 drivers/misc/mei/pxp/Makefile create mode 100644 drivers/misc/mei/pxp/mei_pxp.c create mode 100644 drivers/misc/mei/pxp/mei_pxp.h (limited to 'drivers') diff --git a/drivers/misc/mei/Kconfig b/drivers/misc/mei/Kconfig index f5fd5b786607..0e0bcd0da852 100644 --- a/drivers/misc/mei/Kconfig +++ b/drivers/misc/mei/Kconfig @@ -47,3 +47,5 @@ config INTEL_MEI_TXE Intel Bay Trail source "drivers/misc/mei/hdcp/Kconfig" +source "drivers/misc/mei/pxp/Kconfig" + diff --git a/drivers/misc/mei/Makefile b/drivers/misc/mei/Makefile index f1c76f7ee804..d8e5165917f2 100644 --- a/drivers/misc/mei/Makefile +++ b/drivers/misc/mei/Makefile @@ -26,3 +26,4 @@ mei-$(CONFIG_EVENT_TRACING) += mei-trace.o CFLAGS_mei-trace.o = -I$(src) obj-$(CONFIG_INTEL_MEI_HDCP) += hdcp/ +obj-$(CONFIG_INTEL_MEI_PXP) += pxp/ diff --git a/drivers/misc/mei/pxp/Kconfig b/drivers/misc/mei/pxp/Kconfig new file mode 100644 index 000000000000..4029b96afc04 --- /dev/null +++ b/drivers/misc/mei/pxp/Kconfig @@ -0,0 +1,13 @@ + +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2020, Intel Corporation. All rights reserved. +# +config INTEL_MEI_PXP + tristate "Intel PXP services of ME Interface" + select INTEL_MEI_ME + depends on DRM_I915 + help + MEI Support for PXP Services on Intel platforms. + + Enables the ME FW services required for PXP support through + I915 display driver of Intel. diff --git a/drivers/misc/mei/pxp/Makefile b/drivers/misc/mei/pxp/Makefile new file mode 100644 index 000000000000..0329950d5794 --- /dev/null +++ b/drivers/misc/mei/pxp/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2020, Intel Corporation. All rights reserved. +# +# Makefile - PXP client driver for Intel MEI Bus Driver. + +obj-$(CONFIG_INTEL_MEI_PXP) += mei_pxp.o diff --git a/drivers/misc/mei/pxp/mei_pxp.c b/drivers/misc/mei/pxp/mei_pxp.c new file mode 100644 index 000000000000..f7380d387bab --- /dev/null +++ b/drivers/misc/mei/pxp/mei_pxp.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2020 - 2021 Intel Corporation + */ + +/** + * DOC: MEI_PXP Client Driver + * + * The mei_pxp driver acts as a translation layer between PXP + * protocol implementer (I915) and ME FW by translating PXP + * negotiation messages to ME FW command payloads and vice versa. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mei_pxp.h" + +/** + * mei_pxp_send_message() - Sends a PXP message to ME FW. + * @dev: device corresponding to the mei_cl_device + * @message: a message buffer to send + * @size: size of the message + * Return: 0 on Success, <0 on Failure + */ +static int +mei_pxp_send_message(struct device *dev, const void *message, size_t size) +{ + struct mei_cl_device *cldev; + ssize_t byte; + + if (!dev || !message) + return -EINVAL; + + cldev = to_mei_cl_device(dev); + + /* temporary drop const qualifier till the API is fixed */ + byte = mei_cldev_send(cldev, (u8 *)message, size); + if (byte < 0) { + dev_dbg(dev, "mei_cldev_send failed. %zd\n", byte); + return byte; + } + + return 0; +} + +/** + * mei_pxp_receive_message() - Receives a PXP message from ME FW. + * @dev: device corresponding to the mei_cl_device + * @buffer: a message buffer to contain the received message + * @size: size of the buffer + * Return: bytes sent on Success, <0 on Failure + */ +static int +mei_pxp_receive_message(struct device *dev, void *buffer, size_t size) +{ + struct mei_cl_device *cldev; + ssize_t byte; + + if (!dev || !buffer) + return -EINVAL; + + cldev = to_mei_cl_device(dev); + + byte = mei_cldev_recv(cldev, buffer, size); + if (byte < 0) { + dev_dbg(dev, "mei_cldev_recv failed. %zd\n", byte); + return byte; + } + + return byte; +} + +static const struct i915_pxp_component_ops mei_pxp_ops = { + .owner = THIS_MODULE, + .send = mei_pxp_send_message, + .recv = mei_pxp_receive_message, +}; + +static int mei_component_master_bind(struct device *dev) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + struct i915_pxp_component *comp_master = mei_cldev_get_drvdata(cldev); + int ret; + + comp_master->ops = &mei_pxp_ops; + comp_master->tee_dev = dev; + ret = component_bind_all(dev, comp_master); + if (ret < 0) + return ret; + + return 0; +} + +static void mei_component_master_unbind(struct device *dev) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + struct i915_pxp_component *comp_master = mei_cldev_get_drvdata(cldev); + + component_unbind_all(dev, comp_master); +} + +static const struct component_master_ops mei_component_master_ops = { + .bind = mei_component_master_bind, + .unbind = mei_component_master_unbind, +}; + +/** + * mei_pxp_component_match - compare function for matching mei pxp. + * + * The function checks if the driver is i915, the subcomponent is PXP + * and the grand parent of pxp and the parent of i915 are the same + * PCH device. + * + * @dev: master device + * @subcomponent: subcomponent to match (I915_COMPONENT_PXP) + * @data: compare data (mei pxp device) + * + * Return: + * * 1 - if components match + * * 0 - otherwise + */ +static int mei_pxp_component_match(struct device *dev, int subcomponent, + void *data) +{ + struct device *base = data; + + if (strcmp(dev->driver->name, "i915") || + subcomponent != I915_COMPONENT_PXP) + return 0; + + base = base->parent; + if (!base) + return 0; + + base = base->parent; + dev = dev->parent; + + return (base && dev && dev == base); +} + +static int mei_pxp_probe(struct mei_cl_device *cldev, + const struct mei_cl_device_id *id) +{ + struct i915_pxp_component *comp_master; + struct component_match *master_match; + int ret; + + ret = mei_cldev_enable(cldev); + if (ret < 0) { + dev_err(&cldev->dev, "mei_cldev_enable Failed. %d\n", ret); + goto enable_err_exit; + } + + comp_master = kzalloc(sizeof(*comp_master), GFP_KERNEL); + if (!comp_master) { + ret = -ENOMEM; + goto err_exit; + } + + master_match = NULL; + component_match_add_typed(&cldev->dev, &master_match, + mei_pxp_component_match, &cldev->dev); + if (IS_ERR_OR_NULL(master_match)) { + ret = -ENOMEM; + goto err_exit; + } + + mei_cldev_set_drvdata(cldev, comp_master); + ret = component_master_add_with_match(&cldev->dev, + &mei_component_master_ops, + master_match); + if (ret < 0) { + dev_err(&cldev->dev, "Master comp add failed %d\n", ret); + goto err_exit; + } + + return 0; + +err_exit: + mei_cldev_set_drvdata(cldev, NULL); + kfree(comp_master); + mei_cldev_disable(cldev); +enable_err_exit: + return ret; +} + +static void mei_pxp_remove(struct mei_cl_device *cldev) +{ + struct i915_pxp_component *comp_master = mei_cldev_get_drvdata(cldev); + int ret; + + component_master_del(&cldev->dev, &mei_component_master_ops); + kfree(comp_master); + mei_cldev_set_drvdata(cldev, NULL); + + ret = mei_cldev_disable(cldev); + if (ret) + dev_warn(&cldev->dev, "mei_cldev_disable() failed\n"); +} + +/* fbf6fcf1-96cf-4e2e-a6a6-1bab8cbe36b1 : PAVP GUID*/ +#define MEI_GUID_PXP GUID_INIT(0xfbf6fcf1, 0x96cf, 0x4e2e, 0xA6, \ + 0xa6, 0x1b, 0xab, 0x8c, 0xbe, 0x36, 0xb1) + +static struct mei_cl_device_id mei_pxp_tbl[] = { + { .uuid = MEI_GUID_PXP, .version = MEI_CL_VERSION_ANY }, + { } +}; +MODULE_DEVICE_TABLE(mei, mei_pxp_tbl); + +static struct mei_cl_driver mei_pxp_driver = { + .id_table = mei_pxp_tbl, + .name = KBUILD_MODNAME, + .probe = mei_pxp_probe, + .remove = mei_pxp_remove, +}; + +module_mei_cl_driver(mei_pxp_driver); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("MEI PXP"); diff --git a/drivers/misc/mei/pxp/mei_pxp.h b/drivers/misc/mei/pxp/mei_pxp.h new file mode 100644 index 000000000000..e7b15373fefd --- /dev/null +++ b/drivers/misc/mei/pxp/mei_pxp.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright © 2020 Intel Corporation + * + * Authors: + * Vitaly Lubart + */ + +#ifndef __MEI_PXP_H__ +#define __MEI_PXP_H__ + +/* me_pxp_status: Enumeration of all PXP Status Codes */ +enum me_pxp_status { + ME_PXP_STATUS_SUCCESS = 0x0000, + +}; + +#endif /* __MEI_PXP_H__ */ -- cgit v1.2.3-70-g09d2 From e6aa71361bb9a40cb69376657afbbe32aa777d2a Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 24 Sep 2021 12:14:38 -0700 Subject: drm/i915/pxp: define PXP device flag and kconfig Ahead of the PXP implementation, define the relevant define flag and kconfig option. v2: flip kconfig default to N. Some machines have IFWIs that do not support PXP, so we need it to be an opt-in until we add support to query the caps from the mei device. v10: change comments from "Gen12+" to "Gen12 and newer" Signed-off-by: Alan Previn Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-4-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/Kconfig | 11 +++++++++++ drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/intel_device_info.h | 1 + 3 files changed, 15 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index f960f5d7664e..8859444943a0 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -131,6 +131,17 @@ config DRM_I915_GVT_KVMGT Choose this option if you want to enable KVMGT support for Intel GVT-g. +config DRM_I915_PXP + bool "Enable Intel PXP support for Intel Gen12 and newer platform" + depends on DRM_I915 + depends on INTEL_MEI && INTEL_MEI_PXP + default n + help + PXP (Protected Xe Path) is an i915 component, available on GEN12 and + newer GPUs, that helps to establish the hardware protected session and + manage the status of the alive software session, as well as its life + cycle. + menu "drm/i915 Debugging" depends on DRM_I915 depends on EXPERT diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 93d13aba6a85..357faa043b3a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1700,6 +1700,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_GLOBAL_MOCS_REGISTERS(dev_priv) (INTEL_INFO(dev_priv)->has_global_mocs) +#define HAS_PXP(dev_priv) ((IS_ENABLED(CONFIG_DRM_I915_PXP) && \ + INTEL_INFO(dev_priv)->has_pxp) && \ + VDBOX_MASK(&dev_priv->gt)) #define HAS_GMCH(dev_priv) (INTEL_INFO(dev_priv)->display.has_gmch) diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index d328bb95c49b..8e6f48d1eb7b 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -133,6 +133,7 @@ enum intel_ppgtt_type { func(has_logical_ring_elsq); \ func(has_mslices); \ func(has_pooled_eu); \ + func(has_pxp); \ func(has_rc6); \ func(has_rc6p); \ func(has_rps); \ -- cgit v1.2.3-70-g09d2 From 3ad2dd9c4caa7330dd08244e94bec49a62fee6e4 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 24 Sep 2021 12:14:39 -0700 Subject: drm/i915/pxp: allocate a vcs context for pxp usage The context is required to send the session termination commands to the VCS, which will be implemented in a follow-up patch. We can also use the presence of the context as a check of pxp initialization completion. v2: use perma-pinned context (Chris) v3: rename pinned_context functions (Chris) v4: split export of pinned_context functions to a separate patch (Rodrigo) v10: remove inclusion of intel_gt_types.h from intel_pxp.h (Jani) v13: fixed for loop pointer dereference (Vinay) Signed-off-by: Alan Previn Signed-off-by: Daniele Ceraolo Spurio Cc: Chris Wilson Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-5-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/Makefile | 4 ++ drivers/gpu/drm/i915/gt/intel_engine.h | 2 + drivers/gpu/drm/i915/gt/intel_gt.c | 5 +++ drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 ++ drivers/gpu/drm/i915/pxp/intel_pxp.c | 70 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/pxp/intel_pxp.h | 30 +++++++++++++ drivers/gpu/drm/i915/pxp/intel_pxp_types.h | 17 ++++++++ 7 files changed, 131 insertions(+) create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp.c create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp.h create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_types.h (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 3b9a60adfe57..d480c9d3ef8c 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -278,6 +278,10 @@ i915-y += \ i915-y += i915_perf.o +# Protected execution platform (PXP) support +i915-$(CONFIG_DRM_I915_PXP) += \ + pxp/intel_pxp.o + # Post-mortem debug and GPU hang state capture i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o i915-$(CONFIG_DRM_I915_SELFTEST) += \ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 87579affb952..eed4634c08cd 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -175,6 +175,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define I915_GEM_HWS_SEQNO 0x40 #define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) #define I915_GEM_HWS_MIGRATE (0x42 * sizeof(u32)) +#define I915_GEM_HWS_PXP 0x60 +#define I915_GEM_HWS_PXP_ADDR (I915_GEM_HWS_PXP * sizeof(u32)) #define I915_GEM_HWS_SCRATCH 0x80 #define I915_HWS_CSB_BUF0_INDEX 0x10 diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 5b3acf2b064e..1cb1948ac959 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -21,6 +21,7 @@ #include "intel_rps.h" #include "intel_uncore.h" #include "shmem_utils.h" +#include "pxp/intel_pxp.h" void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) { @@ -714,6 +715,8 @@ int intel_gt_init(struct intel_gt *gt) intel_migrate_init(>->migrate, gt); + intel_pxp_init(>->pxp); + goto out_fw; err_gt: __intel_gt_disable(gt); @@ -751,6 +754,8 @@ void intel_gt_driver_unregister(struct intel_gt *gt) intel_rps_driver_unregister(>->rps); + intel_pxp_fini(>->pxp); + /* * Upon unregistering the device to prevent any new users, cancel * all in-flight requests so that we can quickly unbind the active diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 9f711ee0d42c..14216cc471b1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -26,6 +26,7 @@ #include "intel_rps_types.h" #include "intel_migrate_types.h" #include "intel_wakeref.h" +#include "pxp/intel_pxp_types.h" struct drm_i915_private; struct i915_ggtt; @@ -201,6 +202,8 @@ struct intel_gt { struct { u8 uc_index; } mocs; + + struct intel_pxp pxp; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c new file mode 100644 index 000000000000..9258e0ddd0d5 --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2020 Intel Corporation. + */ +#include "intel_pxp.h" +#include "gt/intel_context.h" +#include "i915_drv.h" + +struct intel_gt *pxp_to_gt(const struct intel_pxp *pxp) +{ + return container_of(pxp, struct intel_gt, pxp); +} + +static int create_vcs_context(struct intel_pxp *pxp) +{ + static struct lock_class_key pxp_lock; + struct intel_gt *gt = pxp_to_gt(pxp); + struct intel_engine_cs *engine; + struct intel_context *ce; + int i; + + /* + * Find the first VCS engine present. We're guaranteed there is one + * if we're in this function due to the check in has_pxp + */ + for (i = 0, engine = NULL; !engine; i++) + engine = gt->engine_class[VIDEO_DECODE_CLASS][i]; + + GEM_BUG_ON(!engine || engine->class != VIDEO_DECODE_CLASS); + + ce = intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_4K, + I915_GEM_HWS_PXP_ADDR, + &pxp_lock, "pxp_context"); + if (IS_ERR(ce)) { + drm_err(>->i915->drm, "failed to create VCS ctx for PXP\n"); + return PTR_ERR(ce); + } + + pxp->ce = ce; + + return 0; +} + +static void destroy_vcs_context(struct intel_pxp *pxp) +{ + intel_engine_destroy_pinned_context(fetch_and_zero(&pxp->ce)); +} + +void intel_pxp_init(struct intel_pxp *pxp) +{ + struct intel_gt *gt = pxp_to_gt(pxp); + int ret; + + if (!HAS_PXP(gt->i915)) + return; + + ret = create_vcs_context(pxp); + if (ret) + return; + + drm_info(>->i915->drm, "Protected Xe Path (PXP) protected content support initialized\n"); +} + +void intel_pxp_fini(struct intel_pxp *pxp) +{ + if (!intel_pxp_is_enabled(pxp)) + return; + + destroy_vcs_context(pxp); +} diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.h b/drivers/gpu/drm/i915/pxp/intel_pxp.h new file mode 100644 index 000000000000..73acd879f2fb --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2020, Intel Corporation. All rights reserved. + */ + +#ifndef __INTEL_PXP_H__ +#define __INTEL_PXP_H__ + +#include "intel_pxp_types.h" + +static inline bool intel_pxp_is_enabled(const struct intel_pxp *pxp) +{ + return pxp->ce; +} + +#ifdef CONFIG_DRM_I915_PXP +struct intel_gt *pxp_to_gt(const struct intel_pxp *pxp); +void intel_pxp_init(struct intel_pxp *pxp); +void intel_pxp_fini(struct intel_pxp *pxp); +#else +static inline void intel_pxp_init(struct intel_pxp *pxp) +{ +} + +static inline void intel_pxp_fini(struct intel_pxp *pxp) +{ +} +#endif + +#endif /* __INTEL_PXP_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h new file mode 100644 index 000000000000..db98df723dbb --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2020, Intel Corporation. All rights reserved. + */ + +#ifndef __INTEL_PXP_TYPES_H__ +#define __INTEL_PXP_TYPES_H__ + +#include + +struct intel_context; + +struct intel_pxp { + struct intel_context *ce; +}; + +#endif /* __INTEL_PXP_TYPES_H__ */ -- cgit v1.2.3-70-g09d2 From 0436ac1b008d48613af26da7053573086827613b Mon Sep 17 00:00:00 2001 From: "Huang, Sean Z" Date: Fri, 24 Sep 2021 12:14:40 -0700 Subject: drm/i915/pxp: Implement funcs to create the TEE channel Implement the funcs to create the TEE channel, so kernel can send the TEE commands directly to TEE for creating the arbitrary (default) session. v2: fix locking, don't pollute dev_priv (Chris) v3: wait for mei PXP component to be bound. v4: drop the wait, as the component might be bound after i915 load completes. We'll instead check when sending a tee message. v5: fix an issue with mei_pxp module removal v6: don't use fetch_and_zero in fini (Rodrigo) Signed-off-by: Huang, Sean Z Signed-off-by: Daniele Ceraolo Spurio Cc: Chris Wilson Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-6-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/Makefile | 3 +- drivers/gpu/drm/i915/pxp/intel_pxp.c | 13 +++++ drivers/gpu/drm/i915/pxp/intel_pxp_tee.c | 79 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/pxp/intel_pxp_tee.h | 14 ++++++ drivers/gpu/drm/i915/pxp/intel_pxp_types.h | 4 ++ 5 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_tee.c create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_tee.h (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index d480c9d3ef8c..d790b603fbdd 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -280,7 +280,8 @@ i915-y += i915_perf.o # Protected execution platform (PXP) support i915-$(CONFIG_DRM_I915_PXP) += \ - pxp/intel_pxp.o + pxp/intel_pxp.o \ + pxp/intel_pxp_tee.o # Post-mortem debug and GPU hang state capture i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index 9258e0ddd0d5..14ab7c97800e 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -3,6 +3,7 @@ * Copyright(c) 2020 Intel Corporation. */ #include "intel_pxp.h" +#include "intel_pxp_tee.h" #include "gt/intel_context.h" #include "i915_drv.h" @@ -58,7 +59,16 @@ void intel_pxp_init(struct intel_pxp *pxp) if (ret) return; + ret = intel_pxp_tee_component_init(pxp); + if (ret) + goto out_context; + drm_info(>->i915->drm, "Protected Xe Path (PXP) protected content support initialized\n"); + + return; + +out_context: + destroy_vcs_context(pxp); } void intel_pxp_fini(struct intel_pxp *pxp) @@ -66,5 +76,8 @@ void intel_pxp_fini(struct intel_pxp *pxp) if (!intel_pxp_is_enabled(pxp)) return; + intel_pxp_tee_component_fini(pxp); + destroy_vcs_context(pxp); + } diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c new file mode 100644 index 000000000000..f1d8de832653 --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2020 Intel Corporation. + */ + +#include +#include "drm/i915_pxp_tee_interface.h" +#include "drm/i915_component.h" +#include "i915_drv.h" +#include "intel_pxp.h" +#include "intel_pxp_tee.h" + +static inline struct intel_pxp *i915_dev_to_pxp(struct device *i915_kdev) +{ + return &kdev_to_i915(i915_kdev)->gt.pxp; +} + +/** + * i915_pxp_tee_component_bind - bind function to pass the function pointers to pxp_tee + * @i915_kdev: pointer to i915 kernel device + * @tee_kdev: pointer to tee kernel device + * @data: pointer to pxp_tee_master containing the function pointers + * + * This bind function is called during the system boot or resume from system sleep. + * + * Return: return 0 if successful. + */ +static int i915_pxp_tee_component_bind(struct device *i915_kdev, + struct device *tee_kdev, void *data) +{ + struct intel_pxp *pxp = i915_dev_to_pxp(i915_kdev); + + pxp->pxp_component = data; + pxp->pxp_component->tee_dev = tee_kdev; + + return 0; +} + +static void i915_pxp_tee_component_unbind(struct device *i915_kdev, + struct device *tee_kdev, void *data) +{ + struct intel_pxp *pxp = i915_dev_to_pxp(i915_kdev); + + pxp->pxp_component = NULL; +} + +static const struct component_ops i915_pxp_tee_component_ops = { + .bind = i915_pxp_tee_component_bind, + .unbind = i915_pxp_tee_component_unbind, +}; + +int intel_pxp_tee_component_init(struct intel_pxp *pxp) +{ + int ret; + struct intel_gt *gt = pxp_to_gt(pxp); + struct drm_i915_private *i915 = gt->i915; + + ret = component_add_typed(i915->drm.dev, &i915_pxp_tee_component_ops, + I915_COMPONENT_PXP); + if (ret < 0) { + drm_err(&i915->drm, "Failed to add PXP component (%d)\n", ret); + return ret; + } + + pxp->pxp_component_added = true; + + return 0; +} + +void intel_pxp_tee_component_fini(struct intel_pxp *pxp) +{ + struct drm_i915_private *i915 = pxp_to_gt(pxp)->i915; + + if (!pxp->pxp_component_added) + return; + + component_del(i915->drm.dev, &i915_pxp_tee_component_ops); + pxp->pxp_component_added = false; +} diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.h b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.h new file mode 100644 index 000000000000..23d050a5d3e7 --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2020, Intel Corporation. All rights reserved. + */ + +#ifndef __INTEL_PXP_TEE_H__ +#define __INTEL_PXP_TEE_H__ + +#include "intel_pxp.h" + +int intel_pxp_tee_component_init(struct intel_pxp *pxp); +void intel_pxp_tee_component_fini(struct intel_pxp *pxp); + +#endif /* __INTEL_PXP_TEE_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h index db98df723dbb..3a8e17e591bd 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h @@ -9,8 +9,12 @@ #include struct intel_context; +struct i915_pxp_component; struct intel_pxp { + struct i915_pxp_component *pxp_component; + bool pxp_component_added; + struct intel_context *ce; }; -- cgit v1.2.3-70-g09d2 From e0111ce0f5cbd2428333c3d3cd33ba42f1e148ad Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 24 Sep 2021 12:14:41 -0700 Subject: drm/i915/pxp: set KCR reg init The setting is required by hardware to allow us doing further protection operation such as sending commands to GPU or TEE. The register needs to be re-programmed on resume, so for simplicitly we bundle the programming with the component binding, which is automatically called on resume. Further HW set-up operations will be added in the same location in follow-up patches, so get ready for them by using a couple of init/fini_hw wrappers instead of calling the KCR funcs directly. v3: move programming to component binding function, rework commit msg Signed-off-by: Huang, Sean Z Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-7-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/pxp/intel_pxp.c | 26 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/pxp/intel_pxp.h | 3 +++ drivers/gpu/drm/i915/pxp/intel_pxp_tee.c | 5 +++++ 3 files changed, 34 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index 14ab7c97800e..f7166dd71d8f 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -12,6 +12,23 @@ struct intel_gt *pxp_to_gt(const struct intel_pxp *pxp) return container_of(pxp, struct intel_gt, pxp); } +/* KCR register definitions */ +#define KCR_INIT _MMIO(0x320f0) +/* Setting KCR Init bit is required after system boot */ +#define KCR_INIT_ALLOW_DISPLAY_ME_WRITES REG_BIT(14) + +static void kcr_pxp_enable(struct intel_gt *gt) +{ + intel_uncore_write(gt->uncore, KCR_INIT, + _MASKED_BIT_ENABLE(KCR_INIT_ALLOW_DISPLAY_ME_WRITES)); +} + +static void kcr_pxp_disable(struct intel_gt *gt) +{ + intel_uncore_write(gt->uncore, KCR_INIT, + _MASKED_BIT_DISABLE(KCR_INIT_ALLOW_DISPLAY_ME_WRITES)); +} + static int create_vcs_context(struct intel_pxp *pxp) { static struct lock_class_key pxp_lock; @@ -79,5 +96,14 @@ void intel_pxp_fini(struct intel_pxp *pxp) intel_pxp_tee_component_fini(pxp); destroy_vcs_context(pxp); +} + +void intel_pxp_init_hw(struct intel_pxp *pxp) +{ + kcr_pxp_enable(pxp_to_gt(pxp)); +} +void intel_pxp_fini_hw(struct intel_pxp *pxp) +{ + kcr_pxp_disable(pxp_to_gt(pxp)); } diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.h b/drivers/gpu/drm/i915/pxp/intel_pxp.h index 73acd879f2fb..cd6560b2605c 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.h @@ -17,6 +17,9 @@ static inline bool intel_pxp_is_enabled(const struct intel_pxp *pxp) struct intel_gt *pxp_to_gt(const struct intel_pxp *pxp); void intel_pxp_init(struct intel_pxp *pxp); void intel_pxp_fini(struct intel_pxp *pxp); + +void intel_pxp_init_hw(struct intel_pxp *pxp); +void intel_pxp_fini_hw(struct intel_pxp *pxp); #else static inline void intel_pxp_init(struct intel_pxp *pxp) { diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c index f1d8de832653..0c0c7946e6a0 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c @@ -33,6 +33,9 @@ static int i915_pxp_tee_component_bind(struct device *i915_kdev, pxp->pxp_component = data; pxp->pxp_component->tee_dev = tee_kdev; + /* the component is required to fully start the PXP HW */ + intel_pxp_init_hw(pxp); + return 0; } @@ -41,6 +44,8 @@ static void i915_pxp_tee_component_unbind(struct device *i915_kdev, { struct intel_pxp *pxp = i915_dev_to_pxp(i915_kdev); + intel_pxp_fini_hw(pxp); + pxp->pxp_component = NULL; } -- cgit v1.2.3-70-g09d2 From cbbd3764b2399ad882cda98435b25144e9ea2124 Mon Sep 17 00:00:00 2001 From: "Huang, Sean Z" Date: Fri, 24 Sep 2021 12:14:42 -0700 Subject: drm/i915/pxp: Create the arbitrary session after boot Create the arbitrary session, with the fixed session id 0xf, after system boot, for the case that application allocates the protected buffer without establishing any protection session. Because the hardware requires at least one alive session for protected buffer creation. This arbitrary session will need to be re-created after teardown or power event because hardware encryption key won't be valid after such cases. The session ID is exposed as part of the uapi so it can be used as part of userspace commands. v2: use gt->uncore->rpm (Chris) v3: s/arb_is_in_play/arb_is_valid (Chris), move set-up to the new init_hw function v4: move interface defs to separate header, set arb_is valid to false on fini (Rodrigo) v5: handle async component binding Signed-off-by: Huang, Sean Z Signed-off-by: Daniele Ceraolo Spurio Cc: Chris Wilson Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-8-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/pxp/intel_pxp.c | 12 +++ drivers/gpu/drm/i915/pxp/intel_pxp.h | 2 + drivers/gpu/drm/i915/pxp/intel_pxp_session.c | 74 ++++++++++++++++++ drivers/gpu/drm/i915/pxp/intel_pxp_session.h | 15 ++++ drivers/gpu/drm/i915/pxp/intel_pxp_tee.c | 87 ++++++++++++++++++++++ drivers/gpu/drm/i915/pxp/intel_pxp_tee.h | 3 + drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h | 36 +++++++++ drivers/gpu/drm/i915/pxp/intel_pxp_types.h | 10 +++ include/uapi/drm/i915_drm.h | 3 + 10 files changed, 243 insertions(+) create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_session.c create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_session.h create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index d790b603fbdd..1152348c04a9 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -281,6 +281,7 @@ i915-y += i915_perf.o # Protected execution platform (PXP) support i915-$(CONFIG_DRM_I915_PXP) += \ pxp/intel_pxp.o \ + pxp/intel_pxp_session.o \ pxp/intel_pxp_tee.o # Post-mortem debug and GPU hang state capture diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index f7166dd71d8f..65598487a7b2 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -3,6 +3,7 @@ * Copyright(c) 2020 Intel Corporation. */ #include "intel_pxp.h" +#include "intel_pxp_session.h" #include "intel_pxp_tee.h" #include "gt/intel_context.h" #include "i915_drv.h" @@ -12,6 +13,11 @@ struct intel_gt *pxp_to_gt(const struct intel_pxp *pxp) return container_of(pxp, struct intel_gt, pxp); } +bool intel_pxp_is_active(const struct intel_pxp *pxp) +{ + return pxp->arb_is_valid; +} + /* KCR register definitions */ #define KCR_INIT _MMIO(0x320f0) /* Setting KCR Init bit is required after system boot */ @@ -72,6 +78,8 @@ void intel_pxp_init(struct intel_pxp *pxp) if (!HAS_PXP(gt->i915)) return; + mutex_init(&pxp->tee_mutex); + ret = create_vcs_context(pxp); if (ret) return; @@ -93,6 +101,8 @@ void intel_pxp_fini(struct intel_pxp *pxp) if (!intel_pxp_is_enabled(pxp)) return; + pxp->arb_is_valid = false; + intel_pxp_tee_component_fini(pxp); destroy_vcs_context(pxp); @@ -101,6 +111,8 @@ void intel_pxp_fini(struct intel_pxp *pxp) void intel_pxp_init_hw(struct intel_pxp *pxp) { kcr_pxp_enable(pxp_to_gt(pxp)); + + intel_pxp_create_arb_session(pxp); } void intel_pxp_fini_hw(struct intel_pxp *pxp) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.h b/drivers/gpu/drm/i915/pxp/intel_pxp.h index cd6560b2605c..2960e8338c82 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.h @@ -15,6 +15,8 @@ static inline bool intel_pxp_is_enabled(const struct intel_pxp *pxp) #ifdef CONFIG_DRM_I915_PXP struct intel_gt *pxp_to_gt(const struct intel_pxp *pxp); +bool intel_pxp_is_active(const struct intel_pxp *pxp); + void intel_pxp_init(struct intel_pxp *pxp); void intel_pxp_fini(struct intel_pxp *pxp); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c new file mode 100644 index 000000000000..3331868f354c --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2020, Intel Corporation. All rights reserved. + */ + +#include "drm/i915_drm.h" +#include "i915_drv.h" + +#include "intel_pxp.h" +#include "intel_pxp_session.h" +#include "intel_pxp_tee.h" +#include "intel_pxp_types.h" + +#define ARB_SESSION I915_PROTECTED_CONTENT_DEFAULT_SESSION /* shorter define */ + +#define GEN12_KCR_SIP _MMIO(0x32260) /* KCR hwdrm session in play 0-31 */ + +static bool intel_pxp_session_is_in_play(struct intel_pxp *pxp, u32 id) +{ + struct intel_gt *gt = pxp_to_gt(pxp); + intel_wakeref_t wakeref; + u32 sip = 0; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + sip = intel_uncore_read(gt->uncore, GEN12_KCR_SIP); + + return sip & BIT(id); +} + +static int pxp_wait_for_session_state(struct intel_pxp *pxp, u32 id, bool in_play) +{ + struct intel_gt *gt = pxp_to_gt(pxp); + intel_wakeref_t wakeref; + u32 mask = BIT(id); + int ret; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + ret = intel_wait_for_register(gt->uncore, + GEN12_KCR_SIP, + mask, + in_play ? mask : 0, + 100); + + return ret; +} + +int intel_pxp_create_arb_session(struct intel_pxp *pxp) +{ + struct intel_gt *gt = pxp_to_gt(pxp); + int ret; + + pxp->arb_is_valid = false; + + if (intel_pxp_session_is_in_play(pxp, ARB_SESSION)) { + drm_err(>->i915->drm, "arb session already in play at creation time\n"); + return -EEXIST; + } + + ret = intel_pxp_tee_cmd_create_arb_session(pxp, ARB_SESSION); + if (ret) { + drm_err(>->i915->drm, "tee cmd for arb session creation failed\n"); + return ret; + } + + ret = pxp_wait_for_session_state(pxp, ARB_SESSION, true); + if (ret) { + drm_err(>->i915->drm, "arb session failed to go in play\n"); + return ret; + } + + pxp->arb_is_valid = true; + + return 0; +} diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.h b/drivers/gpu/drm/i915/pxp/intel_pxp_session.h new file mode 100644 index 000000000000..316c3bebed9c --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2020, Intel Corporation. All rights reserved. + */ + +#ifndef __INTEL_PXP_SESSION_H__ +#define __INTEL_PXP_SESSION_H__ + +#include + +struct intel_pxp; + +int intel_pxp_create_arb_session(struct intel_pxp *pxp); + +#endif /* __INTEL_PXP_SESSION_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c index 0c0c7946e6a0..accbbf9aa6d1 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c @@ -8,13 +8,63 @@ #include "drm/i915_component.h" #include "i915_drv.h" #include "intel_pxp.h" +#include "intel_pxp_session.h" #include "intel_pxp_tee.h" +#include "intel_pxp_tee_interface.h" static inline struct intel_pxp *i915_dev_to_pxp(struct device *i915_kdev) { return &kdev_to_i915(i915_kdev)->gt.pxp; } +static int intel_pxp_tee_io_message(struct intel_pxp *pxp, + void *msg_in, u32 msg_in_size, + void *msg_out, u32 msg_out_max_size, + u32 *msg_out_rcv_size) +{ + struct drm_i915_private *i915 = pxp_to_gt(pxp)->i915; + struct i915_pxp_component *pxp_component = pxp->pxp_component; + int ret = 0; + + mutex_lock(&pxp->tee_mutex); + + /* + * The binding of the component is asynchronous from i915 probe, so we + * can't be sure it has happened. + */ + if (!pxp_component) { + ret = -ENODEV; + goto unlock; + } + + ret = pxp_component->ops->send(pxp_component->tee_dev, msg_in, msg_in_size); + if (ret) { + drm_err(&i915->drm, "Failed to send PXP TEE message\n"); + goto unlock; + } + + ret = pxp_component->ops->recv(pxp_component->tee_dev, msg_out, msg_out_max_size); + if (ret < 0) { + drm_err(&i915->drm, "Failed to receive PXP TEE message\n"); + goto unlock; + } + + if (ret > msg_out_max_size) { + drm_err(&i915->drm, + "Failed to receive PXP TEE message due to unexpected output size\n"); + ret = -ENOSPC; + goto unlock; + } + + if (msg_out_rcv_size) + *msg_out_rcv_size = ret; + + ret = 0; +unlock: + mutex_unlock(&pxp->tee_mutex); + return ret; +} + /** * i915_pxp_tee_component_bind - bind function to pass the function pointers to pxp_tee * @i915_kdev: pointer to i915 kernel device @@ -28,14 +78,24 @@ static inline struct intel_pxp *i915_dev_to_pxp(struct device *i915_kdev) static int i915_pxp_tee_component_bind(struct device *i915_kdev, struct device *tee_kdev, void *data) { + struct drm_i915_private *i915 = kdev_to_i915(i915_kdev); struct intel_pxp *pxp = i915_dev_to_pxp(i915_kdev); + mutex_lock(&pxp->tee_mutex); pxp->pxp_component = data; pxp->pxp_component->tee_dev = tee_kdev; + mutex_unlock(&pxp->tee_mutex); /* the component is required to fully start the PXP HW */ intel_pxp_init_hw(pxp); + if (!pxp->arb_is_valid) { + drm_err(&i915->drm, "Failed to create arb session during bind\n"); + intel_pxp_fini_hw(pxp); + pxp->pxp_component = NULL; + return -EIO; + } + return 0; } @@ -46,7 +106,9 @@ static void i915_pxp_tee_component_unbind(struct device *i915_kdev, intel_pxp_fini_hw(pxp); + mutex_lock(&pxp->tee_mutex); pxp->pxp_component = NULL; + mutex_unlock(&pxp->tee_mutex); } static const struct component_ops i915_pxp_tee_component_ops = { @@ -82,3 +144,28 @@ void intel_pxp_tee_component_fini(struct intel_pxp *pxp) component_del(i915->drm.dev, &i915_pxp_tee_component_ops); pxp->pxp_component_added = false; } + +int intel_pxp_tee_cmd_create_arb_session(struct intel_pxp *pxp, + int arb_session_id) +{ + struct drm_i915_private *i915 = pxp_to_gt(pxp)->i915; + struct pxp_tee_create_arb_in msg_in = {0}; + struct pxp_tee_create_arb_out msg_out = {0}; + int ret; + + msg_in.header.api_version = PXP_TEE_APIVER; + msg_in.header.command_id = PXP_TEE_ARB_CMDID; + msg_in.header.buffer_len = sizeof(msg_in) - sizeof(msg_in.header); + msg_in.protection_mode = PXP_TEE_ARB_PROTECTION_MODE; + msg_in.session_id = arb_session_id; + + ret = intel_pxp_tee_io_message(pxp, + &msg_in, sizeof(msg_in), + &msg_out, sizeof(msg_out), + NULL); + + if (ret) + drm_err(&i915->drm, "Failed to send tee msg ret=[%d]\n", ret); + + return ret; +} diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.h b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.h index 23d050a5d3e7..c136053ce340 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.h @@ -11,4 +11,7 @@ int intel_pxp_tee_component_init(struct intel_pxp *pxp); void intel_pxp_tee_component_fini(struct intel_pxp *pxp); +int intel_pxp_tee_cmd_create_arb_session(struct intel_pxp *pxp, + int arb_session_id); + #endif /* __INTEL_PXP_TEE_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h b/drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h new file mode 100644 index 000000000000..36e9b0868f5c --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2020, Intel Corporation. All rights reserved. + */ + +#ifndef __INTEL_PXP_TEE_INTERFACE_H__ +#define __INTEL_PXP_TEE_INTERFACE_H__ + +#include + +#define PXP_TEE_APIVER 0x40002 +#define PXP_TEE_ARB_CMDID 0x1e +#define PXP_TEE_ARB_PROTECTION_MODE 0x2 + +/* PXP TEE message header */ +struct pxp_tee_cmd_header { + u32 api_version; + u32 command_id; + u32 status; + /* Length of the message (excluding the header) */ + u32 buffer_len; +} __packed; + +/* PXP TEE message input to create a arbitrary session */ +struct pxp_tee_create_arb_in { + struct pxp_tee_cmd_header header; + u32 protection_mode; + u32 session_id; +} __packed; + +/* PXP TEE message output to create a arbitrary session */ +struct pxp_tee_create_arb_out { + struct pxp_tee_cmd_header header; +} __packed; + +#endif /* __INTEL_PXP_TEE_INTERFACE_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h index 3a8e17e591bd..d393e46a7d98 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h @@ -6,6 +6,7 @@ #ifndef __INTEL_PXP_TYPES_H__ #define __INTEL_PXP_TYPES_H__ +#include #include struct intel_context; @@ -16,6 +17,15 @@ struct intel_pxp { bool pxp_component_added; struct intel_context *ce; + + /* + * After a teardown, the arb session can still be in play on the HW + * even if the keys are gone, so we can't rely on the HW state of the + * session to know if it's valid and need to track the status in SW. + */ + bool arb_is_valid; + + struct mutex tee_mutex; /* protects the tee channel binding */ }; #endif /* __INTEL_PXP_TYPES_H__ */ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index bde5860b3686..920e9e852e5a 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -3038,6 +3038,9 @@ struct drm_i915_gem_create_ext_memory_regions { __u64 regions; }; +/* ID of the protected content session managed by i915 when PXP is active */ +#define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf + #if defined(__cplusplus) } #endif -- cgit v1.2.3-70-g09d2 From 95c9e1224da36711a1fc49e243c949a93108e72d Mon Sep 17 00:00:00 2001 From: "Huang, Sean Z" Date: Fri, 24 Sep 2021 12:14:43 -0700 Subject: drm/i915/pxp: Implement arb session teardown Teardown is triggered when the display topology changes and no long meets the secure playback requirement, and hardware trashes all the encryption keys for display. Additionally, we want to emit a teardown operation to make sure we're clean on boot and resume v2: emit in the ring, use high prio request (Chris) v3: better defines, stalling flush, cleaned up and renamed submission funcs (Chris) v12: fix uninitialized variable bug Signed-off-by: Huang, Sean Z Signed-off-by: Daniele Ceraolo Spurio Cc: Chris Wilson Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-9-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 22 ++++- drivers/gpu/drm/i915/pxp/intel_pxp.c | 7 +- drivers/gpu/drm/i915/pxp/intel_pxp_cmd.c | 141 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/pxp/intel_pxp_cmd.h | 15 +++ drivers/gpu/drm/i915/pxp/intel_pxp_session.c | 29 ++++++ drivers/gpu/drm/i915/pxp/intel_pxp_session.h | 1 + 7 files changed, 212 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_cmd.c create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_cmd.h (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 1152348c04a9..e318153d1efa 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -281,6 +281,7 @@ i915-y += i915_perf.o # Protected execution platform (PXP) support i915-$(CONFIG_DRM_I915_PXP) += \ pxp/intel_pxp.o \ + pxp/intel_pxp_cmd.o \ pxp/intel_pxp_session.o \ pxp/intel_pxp_tee.o diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 1c3af0fc0456..f8253012d166 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -28,10 +28,13 @@ #define INSTR_26_TO_24_MASK 0x7000000 #define INSTR_26_TO_24_SHIFT 24 +#define __INSTR(client) ((client) << INSTR_CLIENT_SHIFT) + /* * Memory interface instructions used by the kernel */ -#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags)) +#define MI_INSTR(opcode, flags) \ + (__INSTR(INSTR_MI_CLIENT) | (opcode) << 23 | (flags)) /* Many MI commands use bit 22 of the header dword for GGTT vs PPGTT */ #define MI_GLOBAL_GTT (1<<22) @@ -57,6 +60,7 @@ #define MI_SUSPEND_FLUSH MI_INSTR(0x0b, 0) #define MI_SUSPEND_FLUSH_EN (1<<0) #define MI_SET_APPID MI_INSTR(0x0e, 0) +#define MI_SET_APPID_SESSION_ID(x) ((x) << 0) #define MI_OVERLAY_FLIP MI_INSTR(0x11, 0) #define MI_OVERLAY_CONTINUE (0x0<<21) #define MI_OVERLAY_ON (0x1<<21) @@ -146,6 +150,7 @@ #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) #define MI_SRM_LRM_GLOBAL_GTT (1<<22) #define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */ +#define MI_FLUSH_DW_PROTECTED_MEM_EN (1 << 22) #define MI_FLUSH_DW_STORE_INDEX (1<<21) #define MI_INVALIDATE_TLB (1<<18) #define MI_FLUSH_DW_OP_STOREDW (1<<14) @@ -272,6 +277,19 @@ #define MI_MATH_REG_ZF 0x32 #define MI_MATH_REG_CF 0x33 +/* + * Media instructions used by the kernel + */ +#define MEDIA_INSTR(pipe, op, sub_op, flags) \ + (__INSTR(INSTR_RC_CLIENT) | (pipe) << INSTR_SUBCLIENT_SHIFT | \ + (op) << INSTR_26_TO_24_SHIFT | (sub_op) << 16 | (flags)) + +#define MFX_WAIT MEDIA_INSTR(1, 0, 0, 0) +#define MFX_WAIT_DW0_MFX_SYNC_CONTROL_FLAG REG_BIT(8) +#define MFX_WAIT_DW0_PXP_SYNC_CONTROL_FLAG REG_BIT(9) + +#define CRYPTO_KEY_EXCHANGE MEDIA_INSTR(2, 6, 9, 0) + /* * Commands used only by the command parser */ @@ -328,8 +346,6 @@ #define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \ ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16)) -#define MFX_WAIT ((0x3<<29)|(0x1<<27)|(0x0<<16)) - #define COLOR_BLT ((0x2<<29)|(0x40<<22)) #define SRC_COPY_BLT ((0x2<<29)|(0x43<<22)) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index 65598487a7b2..6afa87f90eef 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -110,9 +110,14 @@ void intel_pxp_fini(struct intel_pxp *pxp) void intel_pxp_init_hw(struct intel_pxp *pxp) { + int ret; + kcr_pxp_enable(pxp_to_gt(pxp)); - intel_pxp_create_arb_session(pxp); + /* always emit a full termination to clean the state */ + ret = intel_pxp_terminate_arb_session_and_global(pxp); + if (!ret) + intel_pxp_create_arb_session(pxp); } void intel_pxp_fini_hw(struct intel_pxp *pxp) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd.c b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd.c new file mode 100644 index 000000000000..f41e45763d0d --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2020, Intel Corporation. All rights reserved. + */ + +#include "intel_pxp.h" +#include "intel_pxp_cmd.h" +#include "intel_pxp_session.h" +#include "gt/intel_context.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_gpu_commands.h" +#include "gt/intel_ring.h" + +#include "i915_trace.h" + +/* stall until prior PXP and MFX/HCP/HUC objects are cmopleted */ +#define MFX_WAIT_PXP (MFX_WAIT | \ + MFX_WAIT_DW0_PXP_SYNC_CONTROL_FLAG | \ + MFX_WAIT_DW0_MFX_SYNC_CONTROL_FLAG) + +static u32 *pxp_emit_session_selection(u32 *cs, u32 idx) +{ + *cs++ = MFX_WAIT_PXP; + + /* pxp off */ + *cs++ = MI_FLUSH_DW; + *cs++ = 0; + *cs++ = 0; + + /* select session */ + *cs++ = MI_SET_APPID | MI_SET_APPID_SESSION_ID(idx); + + *cs++ = MFX_WAIT_PXP; + + /* pxp on */ + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_PROTECTED_MEM_EN | + MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_PXP_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; + + *cs++ = MFX_WAIT_PXP; + + return cs; +} + +static u32 *pxp_emit_inline_termination(u32 *cs) +{ + /* session inline termination */ + *cs++ = CRYPTO_KEY_EXCHANGE; + *cs++ = 0; + + return cs; +} + +static u32 *pxp_emit_session_termination(u32 *cs, u32 idx) +{ + cs = pxp_emit_session_selection(cs, idx); + cs = pxp_emit_inline_termination(cs); + + return cs; +} + +static u32 *pxp_emit_wait(u32 *cs) +{ + /* wait for cmds to go through */ + *cs++ = MFX_WAIT_PXP; + *cs++ = 0; + + return cs; +} + +/* + * if we ever need to terminate more than one session, we can submit multiple + * selections and terminations back-to-back with a single wait at the end + */ +#define SELECTION_LEN 10 +#define TERMINATION_LEN 2 +#define SESSION_TERMINATION_LEN(x) ((SELECTION_LEN + TERMINATION_LEN) * (x)) +#define WAIT_LEN 2 + +static void pxp_request_commit(struct i915_request *rq) +{ + struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX }; + struct intel_timeline * const tl = i915_request_timeline(rq); + + lockdep_unpin_lock(&tl->mutex, rq->cookie); + + trace_i915_request_add(rq); + __i915_request_commit(rq); + __i915_request_queue(rq, &attr); + + mutex_unlock(&tl->mutex); +} + +int intel_pxp_terminate_session(struct intel_pxp *pxp, u32 id) +{ + struct i915_request *rq; + struct intel_context *ce = pxp->ce; + u32 *cs; + int err = 0; + + if (!intel_pxp_is_enabled(pxp)) + return 0; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + if (ce->engine->emit_init_breadcrumb) { + err = ce->engine->emit_init_breadcrumb(rq); + if (err) + goto out_rq; + } + + cs = intel_ring_begin(rq, SESSION_TERMINATION_LEN(1) + WAIT_LEN); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto out_rq; + } + + cs = pxp_emit_session_termination(cs, id); + cs = pxp_emit_wait(cs); + + intel_ring_advance(rq, cs); + +out_rq: + i915_request_get(rq); + + if (unlikely(err)) + i915_request_set_error_once(rq, err); + + pxp_request_commit(rq); + + if (!err && i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + + i915_request_put(rq); + + return err; +} + diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd.h b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd.h new file mode 100644 index 000000000000..6d6299543578 --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2020, Intel Corporation. All rights reserved. + */ + +#ifndef __INTEL_PXP_CMD_H__ +#define __INTEL_PXP_CMD_H__ + +#include + +struct intel_pxp; + +int intel_pxp_terminate_session(struct intel_pxp *pxp, u32 idx); + +#endif /* __INTEL_PXP_CMD_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c index 3331868f354c..b8e24adeb1f3 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c @@ -7,6 +7,7 @@ #include "i915_drv.h" #include "intel_pxp.h" +#include "intel_pxp_cmd.h" #include "intel_pxp_session.h" #include "intel_pxp_tee.h" #include "intel_pxp_types.h" @@ -15,6 +16,9 @@ #define GEN12_KCR_SIP _MMIO(0x32260) /* KCR hwdrm session in play 0-31 */ +/* PXP global terminate register for session termination */ +#define PXP_GLOBAL_TERMINATE _MMIO(0x320f8) + static bool intel_pxp_session_is_in_play(struct intel_pxp *pxp, u32 id) { struct intel_gt *gt = pxp_to_gt(pxp); @@ -72,3 +76,28 @@ int intel_pxp_create_arb_session(struct intel_pxp *pxp) return 0; } + +int intel_pxp_terminate_arb_session_and_global(struct intel_pxp *pxp) +{ + int ret; + struct intel_gt *gt = pxp_to_gt(pxp); + + pxp->arb_is_valid = false; + + /* terminate the hw sessions */ + ret = intel_pxp_terminate_session(pxp, ARB_SESSION); + if (ret) { + drm_err(>->i915->drm, "Failed to submit session termination\n"); + return ret; + } + + ret = pxp_wait_for_session_state(pxp, ARB_SESSION, false); + if (ret) { + drm_err(>->i915->drm, "Session state did not clear\n"); + return ret; + } + + intel_uncore_write(gt->uncore, PXP_GLOBAL_TERMINATE, 1); + + return ret; +} diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.h b/drivers/gpu/drm/i915/pxp/intel_pxp_session.h index 316c3bebed9c..7354314b1cc4 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.h @@ -11,5 +11,6 @@ struct intel_pxp; int intel_pxp_create_arb_session(struct intel_pxp *pxp); +int intel_pxp_terminate_arb_session_and_global(struct intel_pxp *pxp); #endif /* __INTEL_PXP_SESSION_H__ */ -- cgit v1.2.3-70-g09d2 From 2ae096872a2c61da476fb072348ef8f07ea54c9b Mon Sep 17 00:00:00 2001 From: "Huang, Sean Z" Date: Fri, 24 Sep 2021 12:14:44 -0700 Subject: drm/i915/pxp: Implement PXP irq handler The HW will generate a teardown interrupt when session termination is required, which requires i915 to submit a terminating batch. Once the HW is done with the termination it will generate another interrupt, at which point it is safe to re-create the session. Since the termination and re-creation flow is something we want to trigger from the driver as well, use a common work function that can be called both from the irq handler and from the driver set-up flows, which has the addded benefit of allowing us to skip any extra locks because the work itself serializes the operations. v2: use struct completion instead of bool (Chris) v3: drop locks, clean up functions and improve comments (Chris), move to common work function. v4: improve comments, simplify wait logic (Rodrigo) v5: unconditionally set interrupts, rename state_attacked var (Rodrigo) v10: remove inclusion of intel_gt_types.h from intel_pxp.h (Jani) Signed-off-by: Alan Previn Signed-off-by: Huang, Sean Z Signed-off-by: Daniele Ceraolo Spurio Cc: Chris Wilson Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-10-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/intel_gt_irq.c | 7 ++ drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/pxp/intel_pxp.c | 66 ++++++++++++++++-- drivers/gpu/drm/i915/pxp/intel_pxp.h | 8 +++ drivers/gpu/drm/i915/pxp/intel_pxp_irq.c | 100 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/pxp/intel_pxp_irq.h | 32 +++++++++ drivers/gpu/drm/i915/pxp/intel_pxp_session.c | 54 ++++++++++++++- drivers/gpu/drm/i915/pxp/intel_pxp_session.h | 5 +- drivers/gpu/drm/i915/pxp/intel_pxp_tee.c | 8 +-- drivers/gpu/drm/i915/pxp/intel_pxp_types.h | 18 +++++ 11 files changed, 284 insertions(+), 16 deletions(-) create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_irq.c create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_irq.h (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e318153d1efa..5363d3b3fda7 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -282,6 +282,7 @@ i915-y += i915_perf.o i915-$(CONFIG_DRM_I915_PXP) += \ pxp/intel_pxp.o \ pxp/intel_pxp_cmd.o \ + pxp/intel_pxp_irq.o \ pxp/intel_pxp_session.o \ pxp/intel_pxp_tee.o diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index b2de83be4d97..699a74582d32 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -13,6 +13,7 @@ #include "intel_lrc_reg.h" #include "intel_uncore.h" #include "intel_rps.h" +#include "pxp/intel_pxp_irq.h" static void guc_irq_handler(struct intel_guc *guc, u16 iir) { @@ -64,6 +65,9 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance, if (instance == OTHER_GTPM_INSTANCE) return gen11_rps_irq_handler(>->rps, iir); + if (instance == OTHER_KCR_INSTANCE) + return intel_pxp_irq_handler(>->pxp, iir); + WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n", instance, iir); } @@ -196,6 +200,9 @@ void gen11_gt_irq_reset(struct intel_gt *gt) intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_GUC_SG_INTR_ENABLE, 0); intel_uncore_write(uncore, GEN11_GUC_SG_INTR_MASK, ~0); + + intel_uncore_write(uncore, GEN11_CRYPTO_RSVD_INTR_ENABLE, 0); + intel_uncore_write(uncore, GEN11_CRYPTO_RSVD_INTR_MASK, ~0); } void gen11_gt_irq_postinstall(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 64d795461fc5..e2647c02a4d2 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8124,6 +8124,7 @@ enum { /* irq instances for OTHER_CLASS */ #define OTHER_GUC_INSTANCE 0 #define OTHER_GTPM_INSTANCE 1 +#define OTHER_KCR_INSTANCE 4 #define GEN11_INTR_IDENTITY_REG(x) _MMIO(0x190060 + ((x) * 4)) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index 6afa87f90eef..2e188217c3b7 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -2,7 +2,9 @@ /* * Copyright(c) 2020 Intel Corporation. */ +#include #include "intel_pxp.h" +#include "intel_pxp_irq.h" #include "intel_pxp_session.h" #include "intel_pxp_tee.h" #include "gt/intel_context.h" @@ -80,6 +82,16 @@ void intel_pxp_init(struct intel_pxp *pxp) mutex_init(&pxp->tee_mutex); + /* + * we'll use the completion to check if there is a termination pending, + * so we start it as completed and we reinit it when a termination + * is triggered. + */ + init_completion(&pxp->termination); + complete_all(&pxp->termination); + + INIT_WORK(&pxp->session_work, intel_pxp_session_work); + ret = create_vcs_context(pxp); if (ret) return; @@ -108,19 +120,61 @@ void intel_pxp_fini(struct intel_pxp *pxp) destroy_vcs_context(pxp); } -void intel_pxp_init_hw(struct intel_pxp *pxp) +void intel_pxp_mark_termination_in_progress(struct intel_pxp *pxp) { - int ret; + pxp->arb_is_valid = false; + reinit_completion(&pxp->termination); +} + +static void intel_pxp_queue_termination(struct intel_pxp *pxp) +{ + struct intel_gt *gt = pxp_to_gt(pxp); + + /* + * We want to get the same effect as if we received a termination + * interrupt, so just pretend that we did. + */ + spin_lock_irq(>->irq_lock); + intel_pxp_mark_termination_in_progress(pxp); + pxp->session_events |= PXP_TERMINATION_REQUEST; + queue_work(system_unbound_wq, &pxp->session_work); + spin_unlock_irq(>->irq_lock); +} +/* + * the arb session is restarted from the irq work when we receive the + * termination completion interrupt + */ +int intel_pxp_wait_for_arb_start(struct intel_pxp *pxp) +{ + if (!intel_pxp_is_enabled(pxp)) + return 0; + + if (!wait_for_completion_timeout(&pxp->termination, + msecs_to_jiffies(100))) + return -ETIMEDOUT; + + if (!pxp->arb_is_valid) + return -EIO; + + return 0; +} + +void intel_pxp_init_hw(struct intel_pxp *pxp) +{ kcr_pxp_enable(pxp_to_gt(pxp)); + intel_pxp_irq_enable(pxp); - /* always emit a full termination to clean the state */ - ret = intel_pxp_terminate_arb_session_and_global(pxp); - if (!ret) - intel_pxp_create_arb_session(pxp); + /* + * the session could've been attacked while we weren't loaded, so + * handle it as if it was and re-create it. + */ + intel_pxp_queue_termination(pxp); } void intel_pxp_fini_hw(struct intel_pxp *pxp) { kcr_pxp_disable(pxp_to_gt(pxp)); + + intel_pxp_irq_disable(pxp); } diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.h b/drivers/gpu/drm/i915/pxp/intel_pxp.h index 2960e8338c82..54b268280379 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.h @@ -22,6 +22,9 @@ void intel_pxp_fini(struct intel_pxp *pxp); void intel_pxp_init_hw(struct intel_pxp *pxp); void intel_pxp_fini_hw(struct intel_pxp *pxp); + +void intel_pxp_mark_termination_in_progress(struct intel_pxp *pxp); +int intel_pxp_wait_for_arb_start(struct intel_pxp *pxp); #else static inline void intel_pxp_init(struct intel_pxp *pxp) { @@ -30,6 +33,11 @@ static inline void intel_pxp_init(struct intel_pxp *pxp) static inline void intel_pxp_fini(struct intel_pxp *pxp) { } + +static inline int intel_pxp_wait_for_arb_start(struct intel_pxp *pxp) +{ + return -ENODEV; +} #endif #endif /* __INTEL_PXP_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c new file mode 100644 index 000000000000..300cf2b96fda --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2020 Intel Corporation. + */ +#include +#include "intel_pxp.h" +#include "intel_pxp_irq.h" +#include "intel_pxp_session.h" +#include "gt/intel_gt_irq.h" +#include "gt/intel_gt_types.h" +#include "i915_irq.h" +#include "i915_reg.h" + +/** + * intel_pxp_irq_handler - Handles PXP interrupts. + * @pxp: pointer to pxp struct + * @iir: interrupt vector + */ +void intel_pxp_irq_handler(struct intel_pxp *pxp, u16 iir) +{ + struct intel_gt *gt = pxp_to_gt(pxp); + + if (GEM_WARN_ON(!intel_pxp_is_enabled(pxp))) + return; + + lockdep_assert_held(>->irq_lock); + + if (unlikely(!iir)) + return; + + if (iir & (GEN12_DISPLAY_PXP_STATE_TERMINATED_INTERRUPT | + GEN12_DISPLAY_APP_TERMINATED_PER_FW_REQ_INTERRUPT)) { + /* immediately mark PXP as inactive on termination */ + intel_pxp_mark_termination_in_progress(pxp); + pxp->session_events |= PXP_TERMINATION_REQUEST; + } + + if (iir & GEN12_DISPLAY_STATE_RESET_COMPLETE_INTERRUPT) + pxp->session_events |= PXP_TERMINATION_COMPLETE; + + if (pxp->session_events) + queue_work(system_unbound_wq, &pxp->session_work); +} + +static inline void __pxp_set_interrupts(struct intel_gt *gt, u32 interrupts) +{ + struct intel_uncore *uncore = gt->uncore; + const u32 mask = interrupts << 16; + + intel_uncore_write(uncore, GEN11_CRYPTO_RSVD_INTR_ENABLE, mask); + intel_uncore_write(uncore, GEN11_CRYPTO_RSVD_INTR_MASK, ~mask); +} + +static inline void pxp_irq_reset(struct intel_gt *gt) +{ + spin_lock_irq(>->irq_lock); + gen11_gt_reset_one_iir(gt, 0, GEN11_KCR); + spin_unlock_irq(>->irq_lock); +} + +void intel_pxp_irq_enable(struct intel_pxp *pxp) +{ + struct intel_gt *gt = pxp_to_gt(pxp); + + spin_lock_irq(>->irq_lock); + + if (!pxp->irq_enabled) + WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_KCR)); + + __pxp_set_interrupts(gt, GEN12_PXP_INTERRUPTS); + pxp->irq_enabled = true; + + spin_unlock_irq(>->irq_lock); +} + +void intel_pxp_irq_disable(struct intel_pxp *pxp) +{ + struct intel_gt *gt = pxp_to_gt(pxp); + + /* + * We always need to submit a global termination when we re-enable the + * interrupts, so there is no need to make sure that the session state + * makes sense at the end of this function. Just make sure this is not + * called in a path were the driver consider the session as valid and + * doesn't call a termination on restart. + */ + GEM_WARN_ON(intel_pxp_is_active(pxp)); + + spin_lock_irq(>->irq_lock); + + pxp->irq_enabled = false; + __pxp_set_interrupts(gt, 0); + + spin_unlock_irq(>->irq_lock); + intel_synchronize_irq(gt->i915); + + pxp_irq_reset(gt); + + flush_work(&pxp->session_work); +} diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.h b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.h new file mode 100644 index 000000000000..8b5793654844 --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2020, Intel Corporation. All rights reserved. + */ + +#ifndef __INTEL_PXP_IRQ_H__ +#define __INTEL_PXP_IRQ_H__ + +#include + +struct intel_pxp; + +#define GEN12_DISPLAY_PXP_STATE_TERMINATED_INTERRUPT BIT(1) +#define GEN12_DISPLAY_APP_TERMINATED_PER_FW_REQ_INTERRUPT BIT(2) +#define GEN12_DISPLAY_STATE_RESET_COMPLETE_INTERRUPT BIT(3) + +#define GEN12_PXP_INTERRUPTS \ + (GEN12_DISPLAY_PXP_STATE_TERMINATED_INTERRUPT | \ + GEN12_DISPLAY_APP_TERMINATED_PER_FW_REQ_INTERRUPT | \ + GEN12_DISPLAY_STATE_RESET_COMPLETE_INTERRUPT) + +#ifdef CONFIG_DRM_I915_PXP +void intel_pxp_irq_enable(struct intel_pxp *pxp); +void intel_pxp_irq_disable(struct intel_pxp *pxp); +void intel_pxp_irq_handler(struct intel_pxp *pxp, u16 iir); +#else +static inline void intel_pxp_irq_handler(struct intel_pxp *pxp, u16 iir) +{ +} +#endif + +#endif /* __INTEL_PXP_IRQ_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c index b8e24adeb1f3..67c30e534d50 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c @@ -48,7 +48,7 @@ static int pxp_wait_for_session_state(struct intel_pxp *pxp, u32 id, bool in_pla return ret; } -int intel_pxp_create_arb_session(struct intel_pxp *pxp) +static int pxp_create_arb_session(struct intel_pxp *pxp) { struct intel_gt *gt = pxp_to_gt(pxp); int ret; @@ -77,12 +77,13 @@ int intel_pxp_create_arb_session(struct intel_pxp *pxp) return 0; } -int intel_pxp_terminate_arb_session_and_global(struct intel_pxp *pxp) +static int pxp_terminate_arb_session_and_global(struct intel_pxp *pxp) { int ret; struct intel_gt *gt = pxp_to_gt(pxp); - pxp->arb_is_valid = false; + /* must mark termination in progress calling this function */ + GEM_WARN_ON(pxp->arb_is_valid); /* terminate the hw sessions */ ret = intel_pxp_terminate_session(pxp, ARB_SESSION); @@ -101,3 +102,50 @@ int intel_pxp_terminate_arb_session_and_global(struct intel_pxp *pxp) return ret; } + +static void pxp_terminate(struct intel_pxp *pxp) +{ + int ret; + + pxp->hw_state_invalidated = true; + + /* + * if we fail to submit the termination there is no point in waiting for + * it to complete. PXP will be marked as non-active until the next + * termination is issued. + */ + ret = pxp_terminate_arb_session_and_global(pxp); + if (ret) + complete_all(&pxp->termination); +} + +static void pxp_terminate_complete(struct intel_pxp *pxp) +{ + /* Re-create the arb session after teardown handle complete */ + if (fetch_and_zero(&pxp->hw_state_invalidated)) + pxp_create_arb_session(pxp); + + complete_all(&pxp->termination); +} + +void intel_pxp_session_work(struct work_struct *work) +{ + struct intel_pxp *pxp = container_of(work, typeof(*pxp), session_work); + struct intel_gt *gt = pxp_to_gt(pxp); + u32 events = 0; + + spin_lock_irq(>->irq_lock); + events = fetch_and_zero(&pxp->session_events); + spin_unlock_irq(>->irq_lock); + + if (!events) + return; + + if (events & PXP_TERMINATION_REQUEST) { + events &= ~PXP_TERMINATION_COMPLETE; + pxp_terminate(pxp); + } + + if (events & PXP_TERMINATION_COMPLETE) + pxp_terminate_complete(pxp); +} diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.h b/drivers/gpu/drm/i915/pxp/intel_pxp_session.h index 7354314b1cc4..ba4c9d2b94b7 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.h @@ -8,9 +8,8 @@ #include -struct intel_pxp; +struct work_struct; -int intel_pxp_create_arb_session(struct intel_pxp *pxp); -int intel_pxp_terminate_arb_session_and_global(struct intel_pxp *pxp); +void intel_pxp_session_work(struct work_struct *work); #endif /* __INTEL_PXP_SESSION_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c index accbbf9aa6d1..494bd6b48514 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c @@ -80,6 +80,7 @@ static int i915_pxp_tee_component_bind(struct device *i915_kdev, { struct drm_i915_private *i915 = kdev_to_i915(i915_kdev); struct intel_pxp *pxp = i915_dev_to_pxp(i915_kdev); + int ret; mutex_lock(&pxp->tee_mutex); pxp->pxp_component = data; @@ -88,15 +89,14 @@ static int i915_pxp_tee_component_bind(struct device *i915_kdev, /* the component is required to fully start the PXP HW */ intel_pxp_init_hw(pxp); - - if (!pxp->arb_is_valid) { + ret = intel_pxp_wait_for_arb_start(pxp); + if (ret) { drm_err(&i915->drm, "Failed to create arb session during bind\n"); intel_pxp_fini_hw(pxp); pxp->pxp_component = NULL; - return -EIO; } - return 0; + return ret; } static void i915_pxp_tee_component_unbind(struct device *i915_kdev, diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h index d393e46a7d98..5a170e43c959 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h @@ -6,8 +6,10 @@ #ifndef __INTEL_PXP_TYPES_H__ #define __INTEL_PXP_TYPES_H__ +#include #include #include +#include struct intel_context; struct i915_pxp_component; @@ -26,6 +28,22 @@ struct intel_pxp { bool arb_is_valid; struct mutex tee_mutex; /* protects the tee channel binding */ + + /* + * If the HW perceives an attack on the integrity of the encryption it + * will invalidate the keys and expect SW to re-initialize the session. + * We keep track of this state to make sure we only re-start the arb + * session when required. + */ + bool hw_state_invalidated; + + bool irq_enabled; + struct completion termination; + + struct work_struct session_work; + u32 session_events; /* protected with gt->irq_lock */ +#define PXP_TERMINATION_REQUEST BIT(0) +#define PXP_TERMINATION_COMPLETE BIT(1) }; #endif /* __INTEL_PXP_TYPES_H__ */ -- cgit v1.2.3-70-g09d2 From d3ac8d42168a9be7380be8035df8b6d3780ec2a1 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 24 Sep 2021 12:14:45 -0700 Subject: drm/i915/pxp: interfaces for using protected objects This api allow user mode to create protected buffers and to mark contexts as making use of such objects. Only when using contexts marked in such a way is the execution guaranteed to work as expected. Contexts can only be marked as using protected content at creation time (i.e. the parameter is immutable) and they must be both bannable and not recoverable. Given that the protected session gets invalidated on suspend, contexts created this way hold a runtime pm wakeref until they're either destroyed or invalidated. All protected objects and contexts will be considered invalid when the PXP session is destroyed and all new submissions using them will be rejected. All intel contexts within the invalidated gem contexts will be marked banned. Userspace can detect that an invalidation has occurred via the RESET_STATS ioctl, where we report it the same way as a ban due to a hang. v5: squash patches, rebase on proto_ctx, update kerneldoc v6: rebase on obj create_ext changes v7: Use session counter to check if an object it valid, hold wakeref in context, don't add a new flag to RESET_STATS (Daniel) v8: don't increase guilty count for contexts banned during pxp invalidation (Rodrigo) v9: better comments, avoid wakeref put race between pxp_inval and context_close, add usage examples (Rodrigo) v10: modify internal set/get-protected-context functions to not return -ENODEV when setting PXP param to false or getting param when running on pxp-unsupported hw or getting param when i915 was built with CONFIG_PXP off Signed-off-by: Alan Previn Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Bommu Krishnaiah Cc: Rodrigo Vivi Cc: Chris Wilson Cc: Lionel Landwerlin Cc: Jason Ekstrand Cc: Daniel Vetter Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-11-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 95 +++++++++++++++++++---- drivers/gpu/drm/i915/gem/i915_gem_context.h | 6 ++ drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 28 +++++++ drivers/gpu/drm/i915/gem/i915_gem_create.c | 72 ++++++++++++----- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 18 +++++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_object.h | 6 ++ drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 9 ++- drivers/gpu/drm/i915/gem/selftests/mock_context.c | 4 +- drivers/gpu/drm/i915/pxp/intel_pxp.c | 77 ++++++++++++++++++ drivers/gpu/drm/i915/pxp/intel_pxp.h | 12 +++ drivers/gpu/drm/i915/pxp/intel_pxp_session.c | 6 ++ drivers/gpu/drm/i915/pxp/intel_pxp_types.h | 9 +++ include/uapi/drm/i915_drm.h | 94 ++++++++++++++++++++++ 14 files changed, 402 insertions(+), 35 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 8208fd5b72c3..08360d6bbd1e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -77,6 +77,8 @@ #include "gt/intel_gpu_commands.h" #include "gt/intel_ring.h" +#include "pxp/intel_pxp.h" + #include "i915_gem_context.h" #include "i915_trace.h" #include "i915_user_extensions.h" @@ -186,10 +188,13 @@ static int validate_priority(struct drm_i915_private *i915, return 0; } -static void proto_context_close(struct i915_gem_proto_context *pc) +static void proto_context_close(struct drm_i915_private *i915, + struct i915_gem_proto_context *pc) { int i; + if (pc->pxp_wakeref) + intel_runtime_pm_put(&i915->runtime_pm, pc->pxp_wakeref); if (pc->vm) i915_vm_put(pc->vm); if (pc->user_engines) { @@ -241,6 +246,33 @@ static int proto_context_set_persistence(struct drm_i915_private *i915, return 0; } +static int proto_context_set_protected(struct drm_i915_private *i915, + struct i915_gem_proto_context *pc, + bool protected) +{ + int ret = 0; + + if (!protected) { + pc->uses_protected_content = false; + } else if (!intel_pxp_is_enabled(&i915->gt.pxp)) { + ret = -ENODEV; + } else if ((pc->user_flags & BIT(UCONTEXT_RECOVERABLE)) || + !(pc->user_flags & BIT(UCONTEXT_BANNABLE))) { + ret = -EPERM; + } else { + pc->uses_protected_content = true; + + /* + * protected context usage requires the PXP session to be up, + * which in turn requires the device to be active. + */ + pc->pxp_wakeref = intel_runtime_pm_get(&i915->runtime_pm); + ret = intel_pxp_wait_for_arb_start(&i915->gt.pxp); + } + + return ret; +} + static struct i915_gem_proto_context * proto_context_create(struct drm_i915_private *i915, unsigned int flags) { @@ -269,7 +301,7 @@ proto_context_create(struct drm_i915_private *i915, unsigned int flags) return pc; proto_close: - proto_context_close(pc); + proto_context_close(i915, pc); return err; } @@ -693,6 +725,8 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, ret = -EPERM; else if (args->value) pc->user_flags |= BIT(UCONTEXT_BANNABLE); + else if (pc->uses_protected_content) + ret = -EPERM; else pc->user_flags &= ~BIT(UCONTEXT_BANNABLE); break; @@ -700,10 +734,12 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, case I915_CONTEXT_PARAM_RECOVERABLE: if (args->size) ret = -EINVAL; - else if (args->value) - pc->user_flags |= BIT(UCONTEXT_RECOVERABLE); - else + else if (!args->value) pc->user_flags &= ~BIT(UCONTEXT_RECOVERABLE); + else if (pc->uses_protected_content) + ret = -EPERM; + else + pc->user_flags |= BIT(UCONTEXT_RECOVERABLE); break; case I915_CONTEXT_PARAM_PRIORITY: @@ -731,6 +767,11 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, args->value); break; + case I915_CONTEXT_PARAM_PROTECTED_CONTENT: + ret = proto_context_set_protected(fpriv->dev_priv, pc, + args->value); + break; + case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: case I915_CONTEXT_PARAM_RINGSIZE: @@ -956,6 +997,9 @@ static void i915_gem_context_release_work(struct work_struct *work) if (vm) i915_vm_put(vm); + if (ctx->pxp_wakeref) + intel_runtime_pm_put(&ctx->i915->runtime_pm, ctx->pxp_wakeref); + mutex_destroy(&ctx->engines_mutex); mutex_destroy(&ctx->lut_mutex); @@ -1338,6 +1382,11 @@ i915_gem_create_context(struct drm_i915_private *i915, goto err_engines; } + if (pc->uses_protected_content) { + ctx->pxp_wakeref = intel_runtime_pm_get(&i915->runtime_pm); + ctx->uses_protected_content = true; + } + trace_i915_context_create(ctx); return ctx; @@ -1409,7 +1458,7 @@ int i915_gem_context_open(struct drm_i915_private *i915, } ctx = i915_gem_create_context(i915, pc); - proto_context_close(pc); + proto_context_close(i915, pc); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err; @@ -1436,7 +1485,7 @@ void i915_gem_context_close(struct drm_file *file) unsigned long idx; xa_for_each(&file_priv->proto_context_xa, idx, pc) - proto_context_close(pc); + proto_context_close(file_priv->dev_priv, pc); xa_destroy(&file_priv->proto_context_xa); mutex_destroy(&file_priv->proto_context_lock); @@ -1731,6 +1780,15 @@ static int set_priority(struct i915_gem_context *ctx, return 0; } +static int get_protected(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + args->size = 0; + args->value = i915_gem_context_uses_protected_content(ctx); + + return 0; +} + static int ctx_setparam(struct drm_i915_file_private *fpriv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -1754,6 +1812,8 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = -EPERM; else if (args->value) i915_gem_context_set_bannable(ctx); + else if (i915_gem_context_uses_protected_content(ctx)) + ret = -EPERM; /* can't clear this for protected contexts */ else i915_gem_context_clear_bannable(ctx); break; @@ -1761,10 +1821,12 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, case I915_CONTEXT_PARAM_RECOVERABLE: if (args->size) ret = -EINVAL; - else if (args->value) - i915_gem_context_set_recoverable(ctx); - else + else if (!args->value) i915_gem_context_clear_recoverable(ctx); + else if (i915_gem_context_uses_protected_content(ctx)) + ret = -EPERM; /* can't set this for protected contexts */ + else + i915_gem_context_set_recoverable(ctx); break; case I915_CONTEXT_PARAM_PRIORITY: @@ -1779,6 +1841,7 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_persistence(ctx, args); break; + case I915_CONTEXT_PARAM_PROTECTED_CONTENT: case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: case I915_CONTEXT_PARAM_RINGSIZE: @@ -1857,7 +1920,7 @@ finalize_create_context_locked(struct drm_i915_file_private *file_priv, old = xa_erase(&file_priv->proto_context_xa, id); GEM_BUG_ON(old != pc); - proto_context_close(pc); + proto_context_close(file_priv->dev_priv, pc); /* One for the xarray and one for the caller */ return i915_gem_context_get(ctx); @@ -1943,7 +2006,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, goto err_pc; } - proto_context_close(ext_data.pc); + proto_context_close(i915, ext_data.pc); gem_context_register(ctx, ext_data.fpriv, id); } else { ret = proto_context_register(ext_data.fpriv, ext_data.pc, &id); @@ -1957,7 +2020,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, return 0; err_pc: - proto_context_close(ext_data.pc); + proto_context_close(i915, ext_data.pc); return ret; } @@ -1988,7 +2051,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, GEM_WARN_ON(ctx && pc); if (pc) - proto_context_close(pc); + proto_context_close(file_priv->dev_priv, pc); if (ctx) context_close(ctx); @@ -2106,6 +2169,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, args->value = i915_gem_context_is_persistent(ctx); break; + case I915_CONTEXT_PARAM_PROTECTED_CONTENT: + ret = get_protected(ctx, args); + break; + case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: case I915_CONTEXT_PARAM_ENGINES: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index d3279086a5e7..babfecb17ad1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -108,6 +108,12 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) clear_bit(CONTEXT_USER_ENGINES, &ctx->flags); } +static inline bool +i915_gem_context_uses_protected_content(const struct i915_gem_context *ctx) +{ + return ctx->uses_protected_content; +} + /* i915_gem_context.c */ void i915_gem_init__contexts(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index c4617e4d9fa9..a627b09c4680 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -198,6 +198,12 @@ struct i915_gem_proto_context { /** @single_timeline: See See &i915_gem_context.syncobj */ bool single_timeline; + + /** @uses_protected_content: See &i915_gem_context.uses_protected_content */ + bool uses_protected_content; + + /** @pxp_wakeref: See &i915_gem_context.pxp_wakeref */ + intel_wakeref_t pxp_wakeref; }; /** @@ -321,6 +327,28 @@ struct i915_gem_context { #define CONTEXT_CLOSED 0 #define CONTEXT_USER_ENGINES 1 + /** + * @uses_protected_content: context uses PXP-encrypted objects. + * + * This flag can only be set at ctx creation time and it's immutable for + * the lifetime of the context. See I915_CONTEXT_PARAM_PROTECTED_CONTENT + * in uapi/drm/i915_drm.h for more info on setting restrictions and + * expected behaviour of marked contexts. + */ + bool uses_protected_content; + + /** + * @pxp_wakeref: wakeref to keep the device awake when PXP is in use + * + * PXP sessions are invalidated when the device is suspended, which in + * turns invalidates all contexts and objects using it. To keep the + * flow simple, we keep the device awake when contexts using PXP objects + * are in use. It is expected that the userspace application only uses + * PXP when the display is on, so taking a wakeref here shouldn't worsen + * our power metrics. + */ + intel_wakeref_t pxp_wakeref; + /** @mutex: guards everything that isn't engines or handles_vma */ struct mutex mutex; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index 1d341b8c47c0..8955d6abcef1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -6,6 +6,7 @@ #include "gem/i915_gem_ioctls.h" #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" +#include "pxp/intel_pxp.h" #include "i915_drv.h" #include "i915_trace.h" @@ -82,21 +83,11 @@ static int i915_gem_publish(struct drm_i915_gem_object *obj, return 0; } -/** - * Creates a new object using the same path as DRM_I915_GEM_CREATE_EXT - * @i915: i915 private - * @size: size of the buffer, in bytes - * @placements: possible placement regions, in priority order - * @n_placements: number of possible placement regions - * - * This function is exposed primarily for selftests and does very little - * error checking. It is assumed that the set of placement regions has - * already been verified to be valid. - */ -struct drm_i915_gem_object * -__i915_gem_object_create_user(struct drm_i915_private *i915, u64 size, - struct intel_memory_region **placements, - unsigned int n_placements) +static struct drm_i915_gem_object * +__i915_gem_object_create_user_ext(struct drm_i915_private *i915, u64 size, + struct intel_memory_region **placements, + unsigned int n_placements, + unsigned int ext_flags) { struct intel_memory_region *mr = placements[0]; struct drm_i915_gem_object *obj; @@ -135,6 +126,9 @@ __i915_gem_object_create_user(struct drm_i915_private *i915, u64 size, GEM_BUG_ON(size != obj->base.size); + /* Add any flag set by create_ext options */ + obj->flags |= ext_flags; + trace_i915_gem_object_create(obj); return obj; @@ -145,6 +139,26 @@ object_free: return ERR_PTR(ret); } +/** + * Creates a new object using the same path as DRM_I915_GEM_CREATE_EXT + * @i915: i915 private + * @size: size of the buffer, in bytes + * @placements: possible placement regions, in priority order + * @n_placements: number of possible placement regions + * + * This function is exposed primarily for selftests and does very little + * error checking. It is assumed that the set of placement regions has + * already been verified to be valid. + */ +struct drm_i915_gem_object * +__i915_gem_object_create_user(struct drm_i915_private *i915, u64 size, + struct intel_memory_region **placements, + unsigned int n_placements) +{ + return __i915_gem_object_create_user_ext(i915, size, placements, + n_placements, 0); +} + int i915_gem_dumb_create(struct drm_file *file, struct drm_device *dev, @@ -224,6 +238,7 @@ struct create_ext { struct drm_i915_private *i915; struct intel_memory_region *placements[INTEL_REGION_UNKNOWN]; unsigned int n_placements; + unsigned long flags; }; static void repr_placements(char *buf, size_t size, @@ -353,8 +368,28 @@ static int ext_set_placements(struct i915_user_extension __user *base, return set_placements(&ext, data); } +static int ext_set_protected(struct i915_user_extension __user *base, void *data) +{ + struct drm_i915_gem_create_ext_protected_content ext; + struct create_ext *ext_data = data; + + if (copy_from_user(&ext, base, sizeof(ext))) + return -EFAULT; + + if (ext.flags) + return -EINVAL; + + if (!intel_pxp_is_enabled(&ext_data->i915->gt.pxp)) + return -ENODEV; + + ext_data->flags |= I915_BO_PROTECTED; + + return 0; +} + static const i915_user_extension_fn create_extensions[] = { [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements, + [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected, }; /** @@ -389,9 +424,10 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, ext_data.n_placements = 1; } - obj = __i915_gem_object_create_user(i915, args->size, - ext_data.placements, - ext_data.n_placements); + obj = __i915_gem_object_create_user_ext(i915, args->size, + ext_data.placements, + ext_data.n_placements, + ext_data.flags); if (IS_ERR(obj)) return PTR_ERR(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index c2f74dba0557..9447ce6ea500 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -21,6 +21,8 @@ #include "gt/intel_gt_pm.h" #include "gt/intel_ring.h" +#include "pxp/intel_pxp.h" + #include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_gem_context.h" @@ -810,6 +812,22 @@ static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle) if (unlikely(!obj)) return ERR_PTR(-ENOENT); + /* + * If the user has opted-in for protected-object tracking, make + * sure the object encryption can be used. + * We only need to do this when the object is first used with + * this context, because the context itself will be banned when + * the protected objects become invalid. + */ + if (i915_gem_context_uses_protected_content(eb->gem_context) && + i915_gem_object_is_protected(obj)) { + err = intel_pxp_key_check(&vm->gt->pxp, obj); + if (err) { + i915_gem_object_put(obj); + return ERR_PTR(err); + } + } + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { i915_gem_object_put(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index b88b121e244a..76ce6a1500bc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -25,6 +25,7 @@ #include #include "display/intel_frontbuffer.h" +#include "pxp/intel_pxp.h" #include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_gem_context.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 7f9f2e5ba0ec..9df3ee60604e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -272,6 +272,12 @@ i915_gem_object_clear_tiling_quirk(struct drm_i915_gem_object *obj) clear_bit(I915_TILING_QUIRK_BIT, &obj->flags); } +static inline bool +i915_gem_object_is_protected(const struct drm_i915_gem_object *obj) +{ + return obj->flags & I915_BO_PROTECTED; +} + static inline bool i915_gem_object_type_has(const struct drm_i915_gem_object *obj, unsigned long flags) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index fa2ba9e2a4d0..7c3da4e3e737 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -304,7 +304,7 @@ struct drm_i915_gem_object { I915_BO_ALLOC_PM_EARLY) #define I915_BO_READONLY BIT(6) #define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */ - +#define I915_BO_PROTECTED BIT(8) /** * @mem_flags - Mutable placement-related flags * @@ -544,6 +544,13 @@ struct drm_i915_gem_object { bool created:1; } ttm; + /* + * Record which PXP key instance this object was created against (if + * any), so we can use it to determine if the encryption is valid by + * comparing against the current key instance. + */ + u32 pxp_key_instance; + /** Record of address bit 17 of each page at last unbind. */ unsigned long *bit_17; diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index 067d68a6fe4c..c0a8ef368044 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -88,7 +88,7 @@ live_context(struct drm_i915_private *i915, struct file *file) return ERR_CAST(pc); ctx = i915_gem_create_context(i915, pc); - proto_context_close(pc); + proto_context_close(i915, pc); if (IS_ERR(ctx)) return ctx; @@ -163,7 +163,7 @@ kernel_context(struct drm_i915_private *i915, } ctx = i915_gem_create_context(i915, pc); - proto_context_close(pc); + proto_context_close(i915, pc); if (IS_ERR(ctx)) return ctx; diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index 2e188217c3b7..ee507fd8ae19 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -7,6 +7,7 @@ #include "intel_pxp_irq.h" #include "intel_pxp_session.h" #include "intel_pxp_tee.h" +#include "gem/i915_gem_context.h" #include "gt/intel_context.h" #include "i915_drv.h" @@ -178,3 +179,79 @@ void intel_pxp_fini_hw(struct intel_pxp *pxp) intel_pxp_irq_disable(pxp); } + +int intel_pxp_key_check(struct intel_pxp *pxp, struct drm_i915_gem_object *obj) +{ + if (!intel_pxp_is_active(pxp)) + return -ENODEV; + + if (!i915_gem_object_is_protected(obj)) + return -EINVAL; + + GEM_BUG_ON(!pxp->key_instance); + + /* + * If this is the first time we're using this object, it's not + * encrypted yet; it will be encrypted with the current key, so mark it + * as such. If the object is already encrypted, check instead if the + * used key is still valid. + */ + if (!obj->pxp_key_instance) + obj->pxp_key_instance = pxp->key_instance; + else if (obj->pxp_key_instance != pxp->key_instance) + return -ENOEXEC; + + return 0; +} + +void intel_pxp_invalidate(struct intel_pxp *pxp) +{ + struct drm_i915_private *i915 = pxp_to_gt(pxp)->i915; + struct i915_gem_context *ctx, *cn; + + /* ban all contexts marked as protected */ + spin_lock_irq(&i915->gem.contexts.lock); + list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { + struct i915_gem_engines_iter it; + struct intel_context *ce; + + if (!kref_get_unless_zero(&ctx->ref)) + continue; + + if (likely(!i915_gem_context_uses_protected_content(ctx))) { + i915_gem_context_put(ctx); + continue; + } + + spin_unlock_irq(&i915->gem.contexts.lock); + + /* + * By the time we get here we are either going to suspend with + * quiesced execution or the HW keys are already long gone and + * in this case it is worthless to attempt to close the context + * and wait for its execution. It will hang the GPU if it has + * not already. So, as a fast mitigation, we can ban the + * context as quick as we can. That might race with the + * execbuffer, but currently this is the best that can be done. + */ + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) + intel_context_ban(ce, NULL); + i915_gem_context_unlock_engines(ctx); + + /* + * The context has been banned, no need to keep the wakeref. + * This is safe from races because the only other place this + * is touched is context_release and we're holding a ctx ref + */ + if (ctx->pxp_wakeref) { + intel_runtime_pm_put(&i915->runtime_pm, + ctx->pxp_wakeref); + ctx->pxp_wakeref = 0; + } + + spin_lock_irq(&i915->gem.contexts.lock); + list_safe_reset_next(ctx, cn, link); + i915_gem_context_put(ctx); + } + spin_unlock_irq(&i915->gem.contexts.lock); +} diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.h b/drivers/gpu/drm/i915/pxp/intel_pxp.h index 54b268280379..430f01ea9860 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.h @@ -8,6 +8,8 @@ #include "intel_pxp_types.h" +struct drm_i915_gem_object; + static inline bool intel_pxp_is_enabled(const struct intel_pxp *pxp) { return pxp->ce; @@ -25,6 +27,10 @@ void intel_pxp_fini_hw(struct intel_pxp *pxp); void intel_pxp_mark_termination_in_progress(struct intel_pxp *pxp); int intel_pxp_wait_for_arb_start(struct intel_pxp *pxp); + +int intel_pxp_key_check(struct intel_pxp *pxp, struct drm_i915_gem_object *obj); + +void intel_pxp_invalidate(struct intel_pxp *pxp); #else static inline void intel_pxp_init(struct intel_pxp *pxp) { @@ -38,6 +44,12 @@ static inline int intel_pxp_wait_for_arb_start(struct intel_pxp *pxp) { return -ENODEV; } + +static inline int intel_pxp_key_check(struct intel_pxp *pxp, + struct drm_i915_gem_object *obj) +{ + return -ENODEV; +} #endif #endif /* __INTEL_PXP_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c index 67c30e534d50..c6a5e4197e40 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c @@ -72,6 +72,9 @@ static int pxp_create_arb_session(struct intel_pxp *pxp) return ret; } + if (!++pxp->key_instance) + ++pxp->key_instance; + pxp->arb_is_valid = true; return 0; @@ -85,6 +88,9 @@ static int pxp_terminate_arb_session_and_global(struct intel_pxp *pxp) /* must mark termination in progress calling this function */ GEM_WARN_ON(pxp->arb_is_valid); + /* invalidate protected objects */ + intel_pxp_invalidate(pxp); + /* terminate the hw sessions */ ret = intel_pxp_terminate_session(pxp, ARB_SESSION); if (ret) { diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h index 5a170e43c959..c394ab2e452b 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h @@ -7,7 +7,9 @@ #define __INTEL_PXP_TYPES_H__ #include +#include #include +#include #include #include @@ -27,6 +29,13 @@ struct intel_pxp { */ bool arb_is_valid; + /* + * Keep track of which key instance we're on, so we can use it to + * determine if an object was created using the current key or a + * previous one. + */ + u32 key_instance; + struct mutex tee_mutex; /* protects the tee channel binding */ /* diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 920e9e852e5a..aa2a7eccfb94 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1846,6 +1846,55 @@ struct drm_i915_gem_context_param { * attempted to use it, never re-use this context param number. */ #define I915_CONTEXT_PARAM_RINGSIZE 0xc + +/* + * I915_CONTEXT_PARAM_PROTECTED_CONTENT: + * + * Mark that the context makes use of protected content, which will result + * in the context being invalidated when the protected content session is. + * Given that the protected content session is killed on suspend, the device + * is kept awake for the lifetime of a protected context, so the user should + * make sure to dispose of them once done. + * This flag can only be set at context creation time and, when set to true, + * must be preceded by an explicit setting of I915_CONTEXT_PARAM_RECOVERABLE + * to false. This flag can't be set to true in conjunction with setting the + * I915_CONTEXT_PARAM_BANNABLE flag to false. Creation example: + * + * .. code-block:: C + * + * struct drm_i915_gem_context_create_ext_setparam p_protected = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_PROTECTED_CONTENT, + * .value = 1, + * } + * }; + * struct drm_i915_gem_context_create_ext_setparam p_norecover = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * .next_extension = to_user_pointer(&p_protected), + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_RECOVERABLE, + * .value = 0, + * } + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_norecover); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * In addition to the normal failure cases, setting this flag during context + * creation can result in the following errors: + * + * -ENODEV: feature not available + * -EPERM: trying to mark a recoverable or not bannable context as protected + */ +#define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd /* Must be kept compact -- no holes and well documented */ __u64 value; @@ -2979,8 +3028,12 @@ struct drm_i915_gem_create_ext { * * For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see * struct drm_i915_gem_create_ext_memory_regions. + * + * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see + * struct drm_i915_gem_create_ext_protected_content. */ #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0 +#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1 __u64 extensions; }; @@ -3038,6 +3091,47 @@ struct drm_i915_gem_create_ext_memory_regions { __u64 regions; }; +/** + * struct drm_i915_gem_create_ext_protected_content - The + * I915_OBJECT_PARAM_PROTECTED_CONTENT extension. + * + * If this extension is provided, buffer contents are expected to be protected + * by PXP encryption and require decryption for scan out and processing. This + * is only possible on platforms that have PXP enabled, on all other scenarios + * using this extension will cause the ioctl to fail and return -ENODEV. The + * flags parameter is reserved for future expansion and must currently be set + * to zero. + * + * The buffer contents are considered invalid after a PXP session teardown. + * + * The encryption is guaranteed to be processed correctly only if the object + * is submitted with a context created using the + * I915_CONTEXT_PARAM_PROTECTED_CONTENT flag. This will also enable extra checks + * at submission time on the validity of the objects involved. + * + * Below is an example on how to create a protected object: + * + * .. code-block:: C + * + * struct drm_i915_gem_create_ext_protected_content protected_ext = { + * .base = { .name = I915_GEM_CREATE_EXT_PROTECTED_CONTENT }, + * .flags = 0, + * }; + * struct drm_i915_gem_create_ext create_ext = { + * .size = PAGE_SIZE, + * .extensions = (uintptr_t)&protected_ext, + * }; + * + * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext); + * if (err) ... + */ +struct drm_i915_gem_create_ext_protected_content { + /** @base: Extension link. See struct i915_user_extension. */ + struct i915_user_extension base; + /** @flags: reserved for future usage, currently MBZ */ + __u32 flags; +}; + /* ID of the protected content session managed by i915 when PXP is active */ #define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf -- cgit v1.2.3-70-g09d2 From 32271ecd6596e67458c75d2d61805bb1c60d7363 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 24 Sep 2021 12:14:46 -0700 Subject: drm/i915/pxp: start the arb session on demand Now that we can handle destruction and re-creation of the arb session, we can postpone the start of the session to the first submission that requires it, to avoid keeping it running with no user. v10: increase timeout when waiting in intel_pxp_start as firmware session startup is slower right after boot. v13: increase the same timeout by 50 milisec because previous timeout was not enough to cover two lower level 100 milisec timeouts in the session termination + creation steps. Signed-off-by: Alan Previn Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-12-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 4 ++- drivers/gpu/drm/i915/pxp/intel_pxp.c | 37 ++++++++++++++++++---------- drivers/gpu/drm/i915/pxp/intel_pxp.h | 10 ++++++-- drivers/gpu/drm/i915/pxp/intel_pxp_irq.c | 2 +- drivers/gpu/drm/i915/pxp/intel_pxp_session.c | 6 ++--- drivers/gpu/drm/i915/pxp/intel_pxp_tee.c | 10 +------- drivers/gpu/drm/i915/pxp/intel_pxp_types.h | 2 ++ 7 files changed, 42 insertions(+), 29 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 08360d6bbd1e..51861a66547c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -267,7 +267,9 @@ static int proto_context_set_protected(struct drm_i915_private *i915, * which in turn requires the device to be active. */ pc->pxp_wakeref = intel_runtime_pm_get(&i915->runtime_pm); - ret = intel_pxp_wait_for_arb_start(&i915->gt.pxp); + + if (!intel_pxp_is_active(&i915->gt.pxp)) + ret = intel_pxp_start(&i915->gt.pxp); } return ret; diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index ee507fd8ae19..7f241c0218c1 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -91,6 +91,7 @@ void intel_pxp_init(struct intel_pxp *pxp) init_completion(&pxp->termination); complete_all(&pxp->termination); + mutex_init(&pxp->arb_mutex); INIT_WORK(&pxp->session_work, intel_pxp_session_work); ret = create_vcs_context(pxp); @@ -127,7 +128,7 @@ void intel_pxp_mark_termination_in_progress(struct intel_pxp *pxp) reinit_completion(&pxp->termination); } -static void intel_pxp_queue_termination(struct intel_pxp *pxp) +static void pxp_queue_termination(struct intel_pxp *pxp) { struct intel_gt *gt = pxp_to_gt(pxp); @@ -146,31 +147,41 @@ static void intel_pxp_queue_termination(struct intel_pxp *pxp) * the arb session is restarted from the irq work when we receive the * termination completion interrupt */ -int intel_pxp_wait_for_arb_start(struct intel_pxp *pxp) +int intel_pxp_start(struct intel_pxp *pxp) { + int ret = 0; + if (!intel_pxp_is_enabled(pxp)) - return 0; + return -ENODEV; + + mutex_lock(&pxp->arb_mutex); + + if (pxp->arb_is_valid) + goto unlock; + + pxp_queue_termination(pxp); if (!wait_for_completion_timeout(&pxp->termination, - msecs_to_jiffies(100))) - return -ETIMEDOUT; + msecs_to_jiffies(250))) { + ret = -ETIMEDOUT; + goto unlock; + } + + /* make sure the compiler doesn't optimize the double access */ + barrier(); if (!pxp->arb_is_valid) - return -EIO; + ret = -EIO; - return 0; +unlock: + mutex_unlock(&pxp->arb_mutex); + return ret; } void intel_pxp_init_hw(struct intel_pxp *pxp) { kcr_pxp_enable(pxp_to_gt(pxp)); intel_pxp_irq_enable(pxp); - - /* - * the session could've been attacked while we weren't loaded, so - * handle it as if it was and re-create it. - */ - intel_pxp_queue_termination(pxp); } void intel_pxp_fini_hw(struct intel_pxp *pxp) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.h b/drivers/gpu/drm/i915/pxp/intel_pxp.h index 430f01ea9860..bce0014d9ff9 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.h @@ -26,7 +26,8 @@ void intel_pxp_init_hw(struct intel_pxp *pxp); void intel_pxp_fini_hw(struct intel_pxp *pxp); void intel_pxp_mark_termination_in_progress(struct intel_pxp *pxp); -int intel_pxp_wait_for_arb_start(struct intel_pxp *pxp); + +int intel_pxp_start(struct intel_pxp *pxp); int intel_pxp_key_check(struct intel_pxp *pxp, struct drm_i915_gem_object *obj); @@ -40,11 +41,16 @@ static inline void intel_pxp_fini(struct intel_pxp *pxp) { } -static inline int intel_pxp_wait_for_arb_start(struct intel_pxp *pxp) +static inline int intel_pxp_start(struct intel_pxp *pxp) { return -ENODEV; } +static inline bool intel_pxp_is_active(const struct intel_pxp *pxp) +{ + return false; +} + static inline int intel_pxp_key_check(struct intel_pxp *pxp, struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c index 300cf2b96fda..7b25efa82b76 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c @@ -32,7 +32,7 @@ void intel_pxp_irq_handler(struct intel_pxp *pxp, u16 iir) GEN12_DISPLAY_APP_TERMINATED_PER_FW_REQ_INTERRUPT)) { /* immediately mark PXP as inactive on termination */ intel_pxp_mark_termination_in_progress(pxp); - pxp->session_events |= PXP_TERMINATION_REQUEST; + pxp->session_events |= PXP_TERMINATION_REQUEST | PXP_INVAL_REQUIRED; } if (iir & GEN12_DISPLAY_STATE_RESET_COMPLETE_INTERRUPT) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c index c6a5e4197e40..a95cc443a48d 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c @@ -88,9 +88,6 @@ static int pxp_terminate_arb_session_and_global(struct intel_pxp *pxp) /* must mark termination in progress calling this function */ GEM_WARN_ON(pxp->arb_is_valid); - /* invalidate protected objects */ - intel_pxp_invalidate(pxp); - /* terminate the hw sessions */ ret = intel_pxp_terminate_session(pxp, ARB_SESSION); if (ret) { @@ -147,6 +144,9 @@ void intel_pxp_session_work(struct work_struct *work) if (!events) return; + if (events & PXP_INVAL_REQUIRED) + intel_pxp_invalidate(pxp); + if (events & PXP_TERMINATION_REQUEST) { events &= ~PXP_TERMINATION_COMPLETE; pxp_terminate(pxp); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c index 494bd6b48514..3fc3ddfd02b3 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c @@ -78,9 +78,7 @@ unlock: static int i915_pxp_tee_component_bind(struct device *i915_kdev, struct device *tee_kdev, void *data) { - struct drm_i915_private *i915 = kdev_to_i915(i915_kdev); struct intel_pxp *pxp = i915_dev_to_pxp(i915_kdev); - int ret; mutex_lock(&pxp->tee_mutex); pxp->pxp_component = data; @@ -89,14 +87,8 @@ static int i915_pxp_tee_component_bind(struct device *i915_kdev, /* the component is required to fully start the PXP HW */ intel_pxp_init_hw(pxp); - ret = intel_pxp_wait_for_arb_start(pxp); - if (ret) { - drm_err(&i915->drm, "Failed to create arb session during bind\n"); - intel_pxp_fini_hw(pxp); - pxp->pxp_component = NULL; - } - return ret; + return 0; } static void i915_pxp_tee_component_unbind(struct device *i915_kdev, diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h index c394ab2e452b..ae24064bb57e 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h @@ -27,6 +27,7 @@ struct intel_pxp { * even if the keys are gone, so we can't rely on the HW state of the * session to know if it's valid and need to track the status in SW. */ + struct mutex arb_mutex; /* protects arb session start */ bool arb_is_valid; /* @@ -53,6 +54,7 @@ struct intel_pxp { u32 session_events; /* protected with gt->irq_lock */ #define PXP_TERMINATION_REQUEST BIT(0) #define PXP_TERMINATION_COMPLETE BIT(1) +#define PXP_INVAL_REQUIRED BIT(2) }; #endif /* __INTEL_PXP_TYPES_H__ */ -- cgit v1.2.3-70-g09d2 From 0cfab4cb3c4e90c1c2f4b18c0b8ac4cb946808c8 Mon Sep 17 00:00:00 2001 From: "Huang, Sean Z" Date: Fri, 24 Sep 2021 12:14:47 -0700 Subject: drm/i915/pxp: Enable PXP power management During the power event S3+ sleep/resume, hardware will lose all the encryption keys for every hardware session, even though the session state might still be marked as alive after resume. Therefore, we should consider the session as dead on suspend and invalidate all the objects. The session will be automatically restarted on the first protected submission on resume. v2: runtime suspend also invalidates the keys v3: fix return codes, simplify rpm ops (Chris), use the new worker func v4: invalidate the objects on suspend, don't re-create the arb sesson on resume (delayed to first submission). v5: move irq changes back to irq patch (Rodrigo) v6: drop invalidation in runtime suspend (Rodrigo) Signed-off-by: Huang, Sean Z Signed-off-by: Daniele Ceraolo Spurio Cc: Chris Wilson Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-13-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/intel_gt_pm.c | 16 +++++++++- drivers/gpu/drm/i915/i915_drv.c | 2 ++ drivers/gpu/drm/i915/pxp/intel_pxp_irq.c | 1 + drivers/gpu/drm/i915/pxp/intel_pxp_pm.c | 46 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/pxp/intel_pxp_pm.h | 24 +++++++++++++++ drivers/gpu/drm/i915/pxp/intel_pxp_session.c | 38 +++++++++++++++++------ drivers/gpu/drm/i915/pxp/intel_pxp_tee.c | 9 ++++++ 8 files changed, 126 insertions(+), 11 deletions(-) create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_pm.c create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_pm.h (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 5363d3b3fda7..dadf879b228c 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -283,6 +283,7 @@ i915-$(CONFIG_DRM_I915_PXP) += \ pxp/intel_pxp.o \ pxp/intel_pxp_cmd.o \ pxp/intel_pxp_irq.o \ + pxp/intel_pxp_pm.o \ pxp/intel_pxp_session.o \ pxp/intel_pxp_tee.o diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index e9da36530b45..524eaf678790 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -18,6 +18,7 @@ #include "intel_rc6.h" #include "intel_rps.h" #include "intel_wakeref.h" +#include "pxp/intel_pxp_pm.h" #define I915_GT_SUSPEND_IDLE_TIMEOUT (HZ / 2) @@ -264,6 +265,8 @@ int intel_gt_resume(struct intel_gt *gt) intel_uc_resume(>->uc); + intel_pxp_resume(>->pxp); + user_forcewake(gt, false); out_fw: @@ -297,6 +300,8 @@ void intel_gt_suspend_prepare(struct intel_gt *gt) { user_forcewake(gt, true); wait_for_suspend(gt); + + intel_pxp_suspend(>->pxp, false); } static suspend_state_t pm_suspend_target(void) @@ -348,6 +353,7 @@ void intel_gt_suspend_late(struct intel_gt *gt) void intel_gt_runtime_suspend(struct intel_gt *gt) { + intel_pxp_suspend(>->pxp, true); intel_uc_runtime_suspend(>->uc); GT_TRACE(gt, "\n"); @@ -355,11 +361,19 @@ void intel_gt_runtime_suspend(struct intel_gt *gt) int intel_gt_runtime_resume(struct intel_gt *gt) { + int ret; + GT_TRACE(gt, "\n"); intel_gt_init_swizzling(gt); intel_ggtt_restore_fences(gt->ggtt); - return intel_uc_runtime_resume(>->uc); + ret = intel_uc_runtime_resume(>->uc); + if (ret) + return ret; + + intel_pxp_resume(>->pxp); + + return 0; } static ktime_t __intel_gt_get_awake_time(const struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ed7b421cad44..d24209f5f37b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -67,6 +67,8 @@ #include "gt/intel_gt_pm.h" #include "gt/intel_rc6.h" +#include "pxp/intel_pxp_pm.h" + #include "i915_debugfs.h" #include "i915_drv.h" #include "i915_ioc32.h" diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c index 7b25efa82b76..8d5553772ded 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c @@ -10,6 +10,7 @@ #include "gt/intel_gt_types.h" #include "i915_irq.h" #include "i915_reg.h" +#include "intel_runtime_pm.h" /** * intel_pxp_irq_handler - Handles PXP interrupts. diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c new file mode 100644 index 000000000000..23fd86de5a24 --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2020 Intel Corporation. + */ + +#include "intel_pxp.h" +#include "intel_pxp_irq.h" +#include "intel_pxp_pm.h" +#include "intel_pxp_session.h" + +void intel_pxp_suspend(struct intel_pxp *pxp, bool runtime) +{ + if (!intel_pxp_is_enabled(pxp)) + return; + + pxp->arb_is_valid = false; + + /* + * Contexts using protected objects keep a runtime PM reference, so we + * can only runtime suspend when all of them have been either closed + * or banned. Therefore, there is no need to invalidate in that + * scenario. + */ + if (!runtime) + intel_pxp_invalidate(pxp); + + intel_pxp_fini_hw(pxp); + + pxp->hw_state_invalidated = false; +} + +void intel_pxp_resume(struct intel_pxp *pxp) +{ + if (!intel_pxp_is_enabled(pxp)) + return; + + /* + * The PXP component gets automatically unbound when we go into S3 and + * re-bound after we come out, so in that scenario we can defer the + * hw init to the bind call. + */ + if (!pxp->pxp_component) + return; + + intel_pxp_init_hw(pxp); +} diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h new file mode 100644 index 000000000000..c89e97a0c3d0 --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2020, Intel Corporation. All rights reserved. + */ + +#ifndef __INTEL_PXP_PM_H__ +#define __INTEL_PXP_PM_H__ + +#include "intel_pxp_types.h" + +#ifdef CONFIG_DRM_I915_PXP +void intel_pxp_suspend(struct intel_pxp *pxp, bool runtime); +void intel_pxp_resume(struct intel_pxp *pxp); +#else +static inline void intel_pxp_suspend(struct intel_pxp *pxp, bool runtime) +{ +} + +static inline void intel_pxp_resume(struct intel_pxp *pxp) +{ +} +#endif + +#endif /* __INTEL_PXP_PM_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c index a95cc443a48d..d02732f04757 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c @@ -21,29 +21,36 @@ static bool intel_pxp_session_is_in_play(struct intel_pxp *pxp, u32 id) { - struct intel_gt *gt = pxp_to_gt(pxp); + struct intel_uncore *uncore = pxp_to_gt(pxp)->uncore; intel_wakeref_t wakeref; u32 sip = 0; - with_intel_runtime_pm(gt->uncore->rpm, wakeref) - sip = intel_uncore_read(gt->uncore, GEN12_KCR_SIP); + /* if we're suspended the session is considered off */ + with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) + sip = intel_uncore_read(uncore, GEN12_KCR_SIP); return sip & BIT(id); } static int pxp_wait_for_session_state(struct intel_pxp *pxp, u32 id, bool in_play) { - struct intel_gt *gt = pxp_to_gt(pxp); + struct intel_uncore *uncore = pxp_to_gt(pxp)->uncore; intel_wakeref_t wakeref; u32 mask = BIT(id); int ret; - with_intel_runtime_pm(gt->uncore->rpm, wakeref) - ret = intel_wait_for_register(gt->uncore, - GEN12_KCR_SIP, - mask, - in_play ? mask : 0, - 100); + /* if we're suspended the session is considered off */ + wakeref = intel_runtime_pm_get_if_in_use(uncore->rpm); + if (!wakeref) + return in_play ? -ENODEV : 0; + + ret = intel_wait_for_register(uncore, + GEN12_KCR_SIP, + mask, + in_play ? mask : 0, + 100); + + intel_runtime_pm_put(uncore->rpm, wakeref); return ret; } @@ -135,6 +142,7 @@ void intel_pxp_session_work(struct work_struct *work) { struct intel_pxp *pxp = container_of(work, typeof(*pxp), session_work); struct intel_gt *gt = pxp_to_gt(pxp); + intel_wakeref_t wakeref; u32 events = 0; spin_lock_irq(>->irq_lock); @@ -147,6 +155,14 @@ void intel_pxp_session_work(struct work_struct *work) if (events & PXP_INVAL_REQUIRED) intel_pxp_invalidate(pxp); + /* + * If we're processing an event while suspending then don't bother, + * we're going to re-init everything on resume anyway. + */ + wakeref = intel_runtime_pm_get_if_in_use(gt->uncore->rpm); + if (!wakeref) + return; + if (events & PXP_TERMINATION_REQUEST) { events &= ~PXP_TERMINATION_COMPLETE; pxp_terminate(pxp); @@ -154,4 +170,6 @@ void intel_pxp_session_work(struct work_struct *work) if (events & PXP_TERMINATION_COMPLETE) pxp_terminate_complete(pxp); + + intel_runtime_pm_put(gt->uncore->rpm, wakeref); } diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c index 3fc3ddfd02b3..49508f31dcb7 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c @@ -78,16 +78,25 @@ unlock: static int i915_pxp_tee_component_bind(struct device *i915_kdev, struct device *tee_kdev, void *data) { + struct drm_i915_private *i915 = kdev_to_i915(i915_kdev); struct intel_pxp *pxp = i915_dev_to_pxp(i915_kdev); + intel_wakeref_t wakeref; mutex_lock(&pxp->tee_mutex); pxp->pxp_component = data; pxp->pxp_component->tee_dev = tee_kdev; mutex_unlock(&pxp->tee_mutex); + /* if we are suspended, the HW will be re-initialized on resume */ + wakeref = intel_runtime_pm_get_if_in_use(&i915->runtime_pm); + if (!wakeref) + return 0; + /* the component is required to fully start the PXP HW */ intel_pxp_init_hw(pxp); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + return 0; } -- cgit v1.2.3-70-g09d2 From ef6ba31dd3840588418e70f4dd63ce6022e1a254 Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Fri, 24 Sep 2021 12:14:48 -0700 Subject: drm/i915/pxp: Add plane decryption support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support to enable/disable PLANE_SURF Decryption Request bit. It requires only to enable plane decryption support when following condition met. 1. PXP session is enabled. 2. Buffer object is protected. v2: - Used gen fb obj user_flags instead gem_object_metadata. [Krishna] v3: - intel_pxp_gem_object_status() API changes. v4: use intel_pxp_is_active (Daniele) v5: rebase and use the new protected object status checker (Daniele) v6: used plane state for plane_decryption to handle async flip as suggested by Ville. v7: check pxp session while plane decrypt state computation. [Ville] removed pointless code. [Ville] v8 (Daniele): update PXP check v9: move decrypt check after icl_check_nv12_planes() when overlays have fb set (Juston) v10 (Daniele): update PXP check again to match rework in earlier patches and don't consider protection valid if the object has not been used in an execbuf beforehand. Cc: Bommu Krishnaiah Cc: Huang Sean Z Cc: Gaurav Kumar Cc: Ville Syrjälä Signed-off-by: Anshuman Gupta Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Juston Li Reviewed-by: Rodrigo Vivi Reviewed-by: Uma Shankar #v9 Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-14-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 26 ++++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_display_types.h | 3 +++ drivers/gpu/drm/i915/display/skl_universal_plane.c | 15 ++++++++++--- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/pxp/intel_pxp.c | 9 +++++--- drivers/gpu/drm/i915/pxp/intel_pxp.h | 7 ++++-- 7 files changed, 54 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 134a6acbd8fb..b9b4f1fbb8ac 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -70,6 +70,8 @@ #include "gt/intel_rps.h" #include "gt/gen8_ppgtt.h" +#include "pxp/intel_pxp.h" + #include "g4x_dp.h" #include "g4x_hdmi.h" #include "i915_drv.h" @@ -9627,13 +9629,23 @@ static int intel_bigjoiner_add_affected_planes(struct intel_atomic_state *state) return 0; } +static bool bo_has_valid_encryption(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + return intel_pxp_key_check(&i915->gt.pxp, obj, false) == 0; +} + static int intel_atomic_check_planes(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc_state *old_crtc_state, *new_crtc_state; struct intel_plane_state *plane_state; struct intel_plane *plane; + struct intel_plane_state *new_plane_state; + struct intel_plane_state *old_plane_state; struct intel_crtc *crtc; + const struct drm_framebuffer *fb; int i, ret; ret = icl_add_linked_planes(state); @@ -9681,6 +9693,16 @@ static int intel_atomic_check_planes(struct intel_atomic_state *state) return ret; } + for_each_new_intel_plane_in_state(state, plane, plane_state, i) { + new_plane_state = intel_atomic_get_new_plane_state(state, plane); + old_plane_state = intel_atomic_get_old_plane_state(state, plane); + fb = new_plane_state->hw.fb; + if (fb) + new_plane_state->decrypt = bo_has_valid_encryption(intel_fb_obj(fb)); + else + new_plane_state->decrypt = old_plane_state->decrypt; + } + return 0; } @@ -9967,6 +9989,10 @@ static int intel_atomic_check_async(struct intel_atomic_state *state) drm_dbg_kms(&i915->drm, "Color range cannot be changed in async flip\n"); return -EINVAL; } + + /* plane decryption is allow to change only in synchronous flips */ + if (old_plane_state->decrypt != new_plane_state->decrypt) + return -EINVAL; } return 0; diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 6beeeeba1bed..53b5db3c6228 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -629,6 +629,9 @@ struct intel_plane_state { struct intel_fb_view view; + /* Plane pxp decryption state */ + bool decrypt; + /* plane control register */ u32 ctl; diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 724e7b04f3b6..55e3f093b951 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -18,6 +18,7 @@ #include "intel_sprite.h" #include "skl_scaler.h" #include "skl_universal_plane.h" +#include "pxp/intel_pxp.h" static const u32 skl_plane_formats[] = { DRM_FORMAT_C8, @@ -1024,7 +1025,7 @@ skl_program_plane(struct intel_plane *plane, u8 alpha = plane_state->hw.alpha >> 8; u32 plane_color_ctl = 0, aux_dist = 0; unsigned long irqflags; - u32 keymsk, keymax; + u32 keymsk, keymax, plane_surf; u32 plane_ctl = plane_state->ctl; plane_ctl |= skl_plane_ctl_crtc(crtc_state); @@ -1113,8 +1114,16 @@ skl_program_plane(struct intel_plane *plane, * the control register just before the surface register. */ intel_de_write_fw(dev_priv, PLANE_CTL(pipe, plane_id), plane_ctl); - intel_de_write_fw(dev_priv, PLANE_SURF(pipe, plane_id), - intel_plane_ggtt_offset(plane_state) + surf_addr); + plane_surf = intel_plane_ggtt_offset(plane_state) + surf_addr; + + /* + * FIXME: pxp session invalidation can hit any time even at time of commit + * or after the commit, display content will be garbage. + */ + if (plane_state->decrypt) + plane_surf |= PLANE_SURF_DECRYPT; + + intel_de_write_fw(dev_priv, PLANE_SURF(pipe, plane_id), plane_surf); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 9447ce6ea500..8b3a25bd93e6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -821,7 +821,7 @@ static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle) */ if (i915_gem_context_uses_protected_content(eb->gem_context) && i915_gem_object_is_protected(obj)) { - err = intel_pxp_key_check(&vm->gt->pxp, obj); + err = intel_pxp_key_check(&vm->gt->pxp, obj, true); if (err) { i915_gem_object_put(obj); return ERR_PTR(err); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e2647c02a4d2..4be684b0fcd4 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7383,6 +7383,7 @@ enum { #define _PLANE_SURF_3(pipe) _PIPE(pipe, _PLANE_SURF_3_A, _PLANE_SURF_3_B) #define PLANE_SURF(pipe, plane) \ _MMIO_PLANE(plane, _PLANE_SURF_1(pipe), _PLANE_SURF_2(pipe)) +#define PLANE_SURF_DECRYPT REG_BIT(2) #define _PLANE_OFFSET_1_B 0x711a4 #define _PLANE_OFFSET_2_B 0x712a4 diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index 7f241c0218c1..b85b72f12726 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -191,7 +191,9 @@ void intel_pxp_fini_hw(struct intel_pxp *pxp) intel_pxp_irq_disable(pxp); } -int intel_pxp_key_check(struct intel_pxp *pxp, struct drm_i915_gem_object *obj) +int intel_pxp_key_check(struct intel_pxp *pxp, + struct drm_i915_gem_object *obj, + bool assign) { if (!intel_pxp_is_active(pxp)) return -ENODEV; @@ -207,9 +209,10 @@ int intel_pxp_key_check(struct intel_pxp *pxp, struct drm_i915_gem_object *obj) * as such. If the object is already encrypted, check instead if the * used key is still valid. */ - if (!obj->pxp_key_instance) + if (!obj->pxp_key_instance && assign) obj->pxp_key_instance = pxp->key_instance; - else if (obj->pxp_key_instance != pxp->key_instance) + + if (obj->pxp_key_instance != pxp->key_instance) return -ENOEXEC; return 0; diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.h b/drivers/gpu/drm/i915/pxp/intel_pxp.h index bce0014d9ff9..aa262258d4d4 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.h @@ -29,7 +29,9 @@ void intel_pxp_mark_termination_in_progress(struct intel_pxp *pxp); int intel_pxp_start(struct intel_pxp *pxp); -int intel_pxp_key_check(struct intel_pxp *pxp, struct drm_i915_gem_object *obj); +int intel_pxp_key_check(struct intel_pxp *pxp, + struct drm_i915_gem_object *obj, + bool assign); void intel_pxp_invalidate(struct intel_pxp *pxp); #else @@ -52,7 +54,8 @@ static inline bool intel_pxp_is_active(const struct intel_pxp *pxp) } static inline int intel_pxp_key_check(struct intel_pxp *pxp, - struct drm_i915_gem_object *obj) + struct drm_i915_gem_object *obj, + bool assign) { return -ENODEV; } -- cgit v1.2.3-70-g09d2 From 6eba56f64d5d5ea5f273557fe46e21799a60da99 Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Fri, 24 Sep 2021 12:14:49 -0700 Subject: drm/i915/pxp: black pixels on pxp disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When protected sufaces has flipped and pxp session is disabled, display black pixels by using plane color CTM correction. v2: - Display black pixels in async flip too. v3: - Removed the black pixels logic for async flip. [Ville] - Used plane state to force black pixels. [Ville] v4 (Daniele): update pxp_is_borked check. v5: rebase on top of v9 plane decryption moving the decrypt check (Juston) Cc: Ville Syrjälä Cc: Gaurav Kumar Cc: Shankar Uma Signed-off-by: Anshuman Gupta Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Juston Li Reviewed-by: Rodrigo Vivi Reviewed-by: Uma Shankar Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-15-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 12 +++++- drivers/gpu/drm/i915/display/intel_display_types.h | 3 ++ drivers/gpu/drm/i915/display/skl_universal_plane.c | 36 ++++++++++++++++- drivers/gpu/drm/i915/i915_reg.h | 46 ++++++++++++++++++++++ 4 files changed, 94 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index b9b4f1fbb8ac..85d0cce4d0c7 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -9636,6 +9636,11 @@ static bool bo_has_valid_encryption(struct drm_i915_gem_object *obj) return intel_pxp_key_check(&i915->gt.pxp, obj, false) == 0; } +static bool pxp_is_borked(struct drm_i915_gem_object *obj) +{ + return i915_gem_object_is_protected(obj) && !bo_has_valid_encryption(obj); +} + static int intel_atomic_check_planes(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); @@ -9697,10 +9702,13 @@ static int intel_atomic_check_planes(struct intel_atomic_state *state) new_plane_state = intel_atomic_get_new_plane_state(state, plane); old_plane_state = intel_atomic_get_old_plane_state(state, plane); fb = new_plane_state->hw.fb; - if (fb) + if (fb) { new_plane_state->decrypt = bo_has_valid_encryption(intel_fb_obj(fb)); - else + new_plane_state->force_black = pxp_is_borked(intel_fb_obj(fb)); + } else { new_plane_state->decrypt = old_plane_state->decrypt; + new_plane_state->force_black = old_plane_state->force_black; + } } return 0; diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 53b5db3c6228..0fe03ef57023 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -632,6 +632,9 @@ struct intel_plane_state { /* Plane pxp decryption state */ bool decrypt; + /* Plane state to display black pixels when pxp is borked */ + bool force_black; + /* plane control register */ u32 ctl; diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 55e3f093b951..c4adcb3e12b3 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -1002,6 +1002,33 @@ static u32 skl_surf_address(const struct intel_plane_state *plane_state, } } +static void intel_load_plane_csc_black(struct intel_plane *intel_plane) +{ + struct drm_i915_private *dev_priv = to_i915(intel_plane->base.dev); + enum pipe pipe = intel_plane->pipe; + enum plane_id plane = intel_plane->id; + u16 postoff = 0; + + drm_dbg_kms(&dev_priv->drm, "plane color CTM to black %s:%d\n", + intel_plane->base.name, plane); + intel_de_write_fw(dev_priv, PLANE_CSC_COEFF(pipe, plane, 0), 0); + intel_de_write_fw(dev_priv, PLANE_CSC_COEFF(pipe, plane, 1), 0); + + intel_de_write_fw(dev_priv, PLANE_CSC_COEFF(pipe, plane, 2), 0); + intel_de_write_fw(dev_priv, PLANE_CSC_COEFF(pipe, plane, 3), 0); + + intel_de_write_fw(dev_priv, PLANE_CSC_COEFF(pipe, plane, 4), 0); + intel_de_write_fw(dev_priv, PLANE_CSC_COEFF(pipe, plane, 5), 0); + + intel_de_write_fw(dev_priv, PLANE_CSC_PREOFF(pipe, plane, 0), 0); + intel_de_write_fw(dev_priv, PLANE_CSC_PREOFF(pipe, plane, 1), 0); + intel_de_write_fw(dev_priv, PLANE_CSC_PREOFF(pipe, plane, 2), 0); + + intel_de_write_fw(dev_priv, PLANE_CSC_POSTOFF(pipe, plane, 0), postoff); + intel_de_write_fw(dev_priv, PLANE_CSC_POSTOFF(pipe, plane, 1), postoff); + intel_de_write_fw(dev_priv, PLANE_CSC_POSTOFF(pipe, plane, 2), postoff); +} + static void skl_program_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, @@ -1115,14 +1142,21 @@ skl_program_plane(struct intel_plane *plane, */ intel_de_write_fw(dev_priv, PLANE_CTL(pipe, plane_id), plane_ctl); plane_surf = intel_plane_ggtt_offset(plane_state) + surf_addr; + plane_color_ctl = intel_de_read_fw(dev_priv, PLANE_COLOR_CTL(pipe, plane_id)); /* * FIXME: pxp session invalidation can hit any time even at time of commit * or after the commit, display content will be garbage. */ - if (plane_state->decrypt) + if (plane_state->decrypt) { plane_surf |= PLANE_SURF_DECRYPT; + } else if (plane_state->force_black) { + intel_load_plane_csc_black(plane); + plane_color_ctl |= PLANE_COLOR_PLANE_CSC_ENABLE; + } + intel_de_write_fw(dev_priv, PLANE_COLOR_CTL(pipe, plane_id), + plane_color_ctl); intel_de_write_fw(dev_priv, PLANE_SURF(pipe, plane_id), plane_surf); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4be684b0fcd4..47bfffe0b4de 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7260,6 +7260,7 @@ enum { #define _PLANE_COLOR_CTL_3_A 0x703CC /* GLK+ */ #define PLANE_COLOR_PIPE_GAMMA_ENABLE (1 << 30) /* Pre-ICL */ #define PLANE_COLOR_YUV_RANGE_CORRECTION_DISABLE (1 << 28) +#define PLANE_COLOR_PLANE_CSC_ENABLE REG_BIT(21) /* ICL+ */ #define PLANE_COLOR_INPUT_CSC_ENABLE (1 << 20) /* ICL+ */ #define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) /* Pre-ICL */ #define PLANE_COLOR_CSC_MODE_BYPASS (0 << 17) @@ -11377,6 +11378,51 @@ enum skl_power_gate { _PAL_PREC_MULTI_SEG_DATA_A, \ _PAL_PREC_MULTI_SEG_DATA_B) +#define _MMIO_PLANE_GAMC(plane, i, a, b) _MMIO(_PIPE(plane, a, b) + (i) * 4) + +/* Plane CSC Registers */ +#define _PLANE_CSC_RY_GY_1_A 0x70210 +#define _PLANE_CSC_RY_GY_2_A 0x70310 + +#define _PLANE_CSC_RY_GY_1_B 0x71210 +#define _PLANE_CSC_RY_GY_2_B 0x71310 + +#define _PLANE_CSC_RY_GY_1(pipe) _PIPE(pipe, _PLANE_CSC_RY_GY_1_A, \ + _PLANE_CSC_RY_GY_1_B) +#define _PLANE_CSC_RY_GY_2(pipe) _PIPE(pipe, _PLANE_INPUT_CSC_RY_GY_2_A, \ + _PLANE_INPUT_CSC_RY_GY_2_B) +#define PLANE_CSC_COEFF(pipe, plane, index) _MMIO_PLANE(plane, \ + _PLANE_CSC_RY_GY_1(pipe) + (index) * 4, \ + _PLANE_CSC_RY_GY_2(pipe) + (index) * 4) + +#define _PLANE_CSC_PREOFF_HI_1_A 0x70228 +#define _PLANE_CSC_PREOFF_HI_2_A 0x70328 + +#define _PLANE_CSC_PREOFF_HI_1_B 0x71228 +#define _PLANE_CSC_PREOFF_HI_2_B 0x71328 + +#define _PLANE_CSC_PREOFF_HI_1(pipe) _PIPE(pipe, _PLANE_CSC_PREOFF_HI_1_A, \ + _PLANE_CSC_PREOFF_HI_1_B) +#define _PLANE_CSC_PREOFF_HI_2(pipe) _PIPE(pipe, _PLANE_CSC_PREOFF_HI_2_A, \ + _PLANE_CSC_PREOFF_HI_2_B) +#define PLANE_CSC_PREOFF(pipe, plane, index) _MMIO_PLANE(plane, _PLANE_CSC_PREOFF_HI_1(pipe) + \ + (index) * 4, _PLANE_CSC_PREOFF_HI_2(pipe) + \ + (index) * 4) + +#define _PLANE_CSC_POSTOFF_HI_1_A 0x70234 +#define _PLANE_CSC_POSTOFF_HI_2_A 0x70334 + +#define _PLANE_CSC_POSTOFF_HI_1_B 0x71234 +#define _PLANE_CSC_POSTOFF_HI_2_B 0x71334 + +#define _PLANE_CSC_POSTOFF_HI_1(pipe) _PIPE(pipe, _PLANE_CSC_POSTOFF_HI_1_A, \ + _PLANE_CSC_POSTOFF_HI_1_B) +#define _PLANE_CSC_POSTOFF_HI_2(pipe) _PIPE(pipe, _PLANE_CSC_POSTOFF_HI_2_A, \ + _PLANE_CSC_POSTOFF_HI_2_B) +#define PLANE_CSC_POSTOFF(pipe, plane, index) _MMIO_PLANE(plane, _PLANE_CSC_POSTOFF_HI_1(pipe) + \ + (index) * 4, _PLANE_CSC_POSTOFF_HI_2(pipe) + \ + (index) * 4) + /* pipe CSC & degamma/gamma LUTs on CHV */ #define _CGM_PIPE_A_CSC_COEFF01 (VLV_DISPLAY_BASE + 0x67900) #define _CGM_PIPE_A_CSC_COEFF23 (VLV_DISPLAY_BASE + 0x67904) -- cgit v1.2.3-70-g09d2 From 390cf1b28b11190121cb05d4cec1f86787b47668 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 24 Sep 2021 12:14:50 -0700 Subject: drm/i915/pxp: add pxp debugfs 2 debugfs files, one to query the current status of the pxp session and one to trigger an invalidation for testing. v2: rename debugfs, fix date (Alan) v12: rebased to latest drm-tip (rename of files/structs from debugfs_gt to intel_debugfs_gt caused compiler errors). Signed-off-by: Daniele Ceraolo Spurio Reviewed-by : Alan Previn Reviewed-by: Alan Previn Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-16-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/intel_gt_debugfs.c | 2 + drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c | 78 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.h | 21 ++++++++ 4 files changed, 102 insertions(+) create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.h (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index dadf879b228c..b2a8406b5398 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -282,6 +282,7 @@ i915-y += i915_perf.o i915-$(CONFIG_DRM_I915_PXP) += \ pxp/intel_pxp.o \ pxp/intel_pxp_cmd.o \ + pxp/intel_pxp_debugfs.o \ pxp/intel_pxp_irq.o \ pxp/intel_pxp_pm.o \ pxp/intel_pxp_session.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c index 03fb4aefbf90..1fe19ccd2794 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c @@ -10,6 +10,7 @@ #include "intel_gt_engines_debugfs.h" #include "intel_gt_pm_debugfs.h" #include "intel_sseu_debugfs.h" +#include "pxp/intel_pxp_debugfs.h" #include "uc/intel_uc_debugfs.h" void intel_gt_debugfs_register(struct intel_gt *gt) @@ -28,6 +29,7 @@ void intel_gt_debugfs_register(struct intel_gt *gt) intel_sseu_debugfs_register(gt, root); intel_uc_debugfs_register(>->uc, root); + intel_pxp_debugfs_register(>->pxp, root); } void intel_gt_debugfs_register_files(struct dentry *root, diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c b/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c new file mode 100644 index 000000000000..10e1e45471f1 --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include +#include + +#include "gt/intel_gt_debugfs.h" +#include "pxp/intel_pxp.h" +#include "pxp/intel_pxp_irq.h" +#include "i915_drv.h" + +static int pxp_info_show(struct seq_file *m, void *data) +{ + struct intel_pxp *pxp = m->private; + struct drm_printer p = drm_seq_file_printer(m); + bool enabled = intel_pxp_is_enabled(pxp); + + if (!enabled) { + drm_printf(&p, "pxp disabled\n"); + return 0; + } + + drm_printf(&p, "active: %s\n", yesno(intel_pxp_is_active(pxp))); + drm_printf(&p, "instance counter: %u\n", pxp->key_instance); + + return 0; +} +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(pxp_info); + +static int pxp_terminate_get(void *data, u64 *val) +{ + /* nothing to read */ + return -EPERM; +} + +static int pxp_terminate_set(void *data, u64 val) +{ + struct intel_pxp *pxp = data; + struct intel_gt *gt = pxp_to_gt(pxp); + + if (!intel_pxp_is_active(pxp)) + return -ENODEV; + + /* simulate a termination interrupt */ + spin_lock_irq(>->irq_lock); + intel_pxp_irq_handler(pxp, GEN12_DISPLAY_PXP_STATE_TERMINATED_INTERRUPT); + spin_unlock_irq(>->irq_lock); + + if (!wait_for_completion_timeout(&pxp->termination, + msecs_to_jiffies(100))) + return -ETIMEDOUT; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(pxp_terminate_fops, pxp_terminate_get, pxp_terminate_set, "%llx\n"); +void intel_pxp_debugfs_register(struct intel_pxp *pxp, struct dentry *gt_root) +{ + static const struct intel_gt_debugfs_file files[] = { + { "info", &pxp_info_fops, NULL }, + { "terminate_state", &pxp_terminate_fops, NULL }, + }; + struct dentry *root; + + if (!gt_root) + return; + + if (!HAS_PXP((pxp_to_gt(pxp)->i915))) + return; + + root = debugfs_create_dir("pxp", gt_root); + if (IS_ERR(root)) + return; + + intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), pxp); +} diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.h b/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.h new file mode 100644 index 000000000000..7e0c3d2f5d7e --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __INTEL_PXP_DEBUGFS_H__ +#define __INTEL_PXP_DEBUGFS_H__ + +struct intel_pxp; +struct dentry; + +#ifdef CONFIG_DRM_I915_PXP +void intel_pxp_debugfs_register(struct intel_pxp *pxp, struct dentry *root); +#else +static inline void +intel_pxp_debugfs_register(struct intel_pxp *pxp, struct dentry *root) +{ +} +#endif + +#endif /* __INTEL_PXP_DEBUGFS_H__ */ -- cgit v1.2.3-70-g09d2 From 2d5517a5c8bfcdc3a401d18a9d0cdf67de4fdcc7 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 24 Sep 2021 12:14:51 -0700 Subject: drm/i915/pxp: add PXP documentation Now that all the pieces are in place we can add a description of how the feature works. Also modify the comments in struct intel_pxp into kerneldoc. v2: improve doc (Rodrigo) Signed-off-by: Daniele Ceraolo Spurio Cc: Daniel Vetter Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-17-alan.previn.teres.alexis@intel.com --- Documentation/gpu/i915.rst | 8 +++++ drivers/gpu/drm/i915/pxp/intel_pxp.c | 28 ++++++++++++++++++ drivers/gpu/drm/i915/pxp/intel_pxp_types.h | 47 ++++++++++++++++++++++-------- 3 files changed, 71 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index e9b11044075e..a671fb2c1878 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -474,6 +474,14 @@ Object Tiling IOCTLs .. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_tiling.c :doc: buffer object tiling +Protected Objects +----------------- + +.. kernel-doc:: drivers/gpu/drm/i915/pxp/intel_pxp.c + :doc: PXP + +.. kernel-doc:: drivers/gpu/drm/i915/pxp/intel_pxp_types.h + Microcontrollers ================ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index b85b72f12726..e2314ad9546d 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -11,6 +11,34 @@ #include "gt/intel_context.h" #include "i915_drv.h" +/** + * DOC: PXP + * + * PXP (Protected Xe Path) is a feature available in Gen12 and newer platforms. + * It allows execution and flip to display of protected (i.e. encrypted) + * objects. The SW support is enabled via the CONFIG_DRM_I915_PXP kconfig. + * + * Objects can opt-in to PXP encryption at creation time via the + * I915_GEM_CREATE_EXT_PROTECTED_CONTENT create_ext flag. For objects to be + * correctly protected they must be used in conjunction with a context created + * with the I915_CONTEXT_PARAM_PROTECTED_CONTENT flag. See the documentation + * of those two uapi flags for details and restrictions. + * + * Protected objects are tied to a pxp session; currently we only support one + * session, which i915 manages and whose index is available in the uapi + * (I915_PROTECTED_CONTENT_DEFAULT_SESSION) for use in instructions targeting + * protected objects. + * The session is invalidated by the HW when certain events occur (e.g. + * suspend/resume). When this happens, all the objects that were used with the + * session are marked as invalid and all contexts marked as using protected + * content are banned. Any further attempt at using them in an execbuf call is + * rejected, while flips are converted to black frames. + * + * Some of the PXP setup operations are performed by the Management Engine, + * which is handled by the mei driver; communication between i915 and mei is + * performed via the mei_pxp component module. + */ + struct intel_gt *pxp_to_gt(const struct intel_pxp *pxp) { return container_of(pxp, struct intel_gt, pxp); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h index ae24064bb57e..73ef7d1754e1 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h @@ -16,42 +16,65 @@ struct intel_context; struct i915_pxp_component; +/** + * struct intel_pxp - pxp state + */ struct intel_pxp { + /** + * @pxp_component: i915_pxp_component struct of the bound mei_pxp + * module. Only set and cleared inside component bind/unbind functions, + * which are protected by &tee_mutex. + */ struct i915_pxp_component *pxp_component; + /** + * @pxp_component_added: track if the pxp component has been added. + * Set and cleared in tee init and fini functions respectively. + */ bool pxp_component_added; + /** @ce: kernel-owned context used for PXP operations */ struct intel_context *ce; - /* + /** @arb_mutex: protects arb session start */ + struct mutex arb_mutex; + /** + * @arb_is_valid: tracks arb session status. * After a teardown, the arb session can still be in play on the HW * even if the keys are gone, so we can't rely on the HW state of the * session to know if it's valid and need to track the status in SW. */ - struct mutex arb_mutex; /* protects arb session start */ bool arb_is_valid; - /* - * Keep track of which key instance we're on, so we can use it to - * determine if an object was created using the current key or a + /** + * @key_instance: tracks which key instance we're on, so we can use it + * to determine if an object was created using the current key or a * previous one. */ u32 key_instance; - struct mutex tee_mutex; /* protects the tee channel binding */ + /** @tee_mutex: protects the tee channel binding and messaging. */ + struct mutex tee_mutex; - /* - * If the HW perceives an attack on the integrity of the encryption it - * will invalidate the keys and expect SW to re-initialize the session. - * We keep track of this state to make sure we only re-start the arb - * session when required. + /** + * @hw_state_invalidated: if the HW perceives an attack on the integrity + * of the encryption it will invalidate the keys and expect SW to + * re-initialize the session. We keep track of this state to make sure + * we only re-start the arb session when required. */ bool hw_state_invalidated; + /** @irq_enabled: tracks the status of the kcr irqs */ bool irq_enabled; + /** + * @termination: tracks the status of a pending termination. Only + * re-initialized under gt->irq_lock and completed in &session_work. + */ struct completion termination; + /** @session_work: worker that manages session events. */ struct work_struct session_work; - u32 session_events; /* protected with gt->irq_lock */ + /** @session_events: pending session events, protected with gt->irq_lock. */ + u32 session_events; #define PXP_TERMINATION_REQUEST BIT(0) #define PXP_TERMINATION_COMPLETE BIT(1) #define PXP_INVAL_REQUIRED BIT(2) -- cgit v1.2.3-70-g09d2 From 6f8e203897144e59de00ed910982af3d7c3e4a7f Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 24 Sep 2021 12:14:52 -0700 Subject: drm/i915/pxp: enable PXP for integrated Gen12 Note that discrete cards can support PXP as well, but we haven't tested on those yet so keeping it disabled for now. Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210924191452.1539378-18-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index d1da31df276c..8adbd9ad7a46 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -865,6 +865,7 @@ static const struct intel_device_info jsl_info = { }, \ TGL_CURSOR_OFFSETS, \ .has_global_mocs = 1, \ + .has_pxp = 1, \ .display.has_dsb = 1 static const struct intel_device_info tgl_info = { @@ -891,6 +892,7 @@ static const struct intel_device_info rkl_info = { #define DGFX_FEATURES \ .memory_regions = REGION_SMEM | REGION_LMEM | REGION_STOLEN_LMEM, \ .has_llc = 0, \ + .has_pxp = 0, \ .has_snoop = 1, \ .is_dgfx = 1 -- cgit v1.2.3-70-g09d2 From 4b2437f6f7b05ffcc7007f8e8d4f028ab86707dd Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 14 Sep 2021 12:49:45 -0700 Subject: drm/i915: Clean up disabled warnings i915 enables a wider set of warnings with '-Wall -Wextra' then disables several with cc-disable-warning. If an unknown flag gets added to KBUILD_CFLAGS when building with clang, all subsequent calls to cc-{disable-warning,option} will fail, meaning that all of these warnings do not get disabled [1]. A separate series will address the root cause of the issue by not adding these flags when building with clang [2]; however, the symptom of these extra warnings appearing can be addressed separately by just removing the calls to cc-disable-warning, which makes the build ever so slightly faster because the compiler does not need to be called as much before building. The following warnings are supported by GCC 4.9 and clang 10.0.1, which are the minimum supported versions of these compilers so the call to cc-disable-warning is not necessary. Masahiro cleaned this up for the reset of the kernel in commit 4c8dd95a723d ("kbuild: add some extra warning flags unconditionally"). * -Wmissing-field-initializers * -Wsign-compare * -Wtype-limits * -Wunused-parameter -Wunused-but-set-variable was implemented in clang 13.0.0 and -Wframe-address was implemented in clang 12.0.0 so the cc-disable-warning calls are kept for these two warnings. Lastly, -Winitializer-overrides is clang's version of -Woverride-init, which is disabled for the specific files that are problematic. clang added a compatibility alias in clang 8.0.0 so -Winitializer-overrides can be removed. [1]: https://lore.kernel.org/r/202108210311.CBtcgoUL-lkp@intel.com/ [2]: https://lore.kernel.org/r/20210824022640.2170859-1-nathan@kernel.org/ Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210914194944.4004260-1-nathan@kernel.org --- drivers/gpu/drm/i915/Makefile | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index b2a8406b5398..e1d6f551ab53 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -13,13 +13,11 @@ # will most likely get a sudden build breakage... Hopefully we will fix # new warnings before CI updates! subdir-ccflags-y := -Wall -Wextra -subdir-ccflags-y += $(call cc-disable-warning, unused-parameter) -subdir-ccflags-y += $(call cc-disable-warning, type-limits) -subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers) +subdir-ccflags-y += -Wno-unused-parameter +subdir-ccflags-y += -Wno-type-limits +subdir-ccflags-y += -Wno-missing-field-initializers +subdir-ccflags-y += -Wno-sign-compare subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) -# clang warnings -subdir-ccflags-y += $(call cc-disable-warning, sign-compare) -subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) subdir-ccflags-y += $(call cc-disable-warning, frame-address) subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror -- cgit v1.2.3-70-g09d2 From 84edf53776343d6b5bf5fa59a6f600a22ca23c40 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 1 Oct 2021 08:58:25 -0700 Subject: drm/i915: Fix bug in user proto-context creation that leaked contexts Set number of engines before attempting to create contexts so the function free_engines can clean up properly. Also check return of alloc_engines for NULL. v2: (Tvrtko) - Send as stand alone patch (John Harrison) - Check for alloc_engines returning NULL v3: (Checkpatch / Tvrtko) - Remove braces around single line if statement Cc: Jason Ekstrand Fixes: d4433c7600f7 ("drm/i915/gem: Use the proto-context to handle create parameters (v5)") Reviewed-by: Tvrtko Ursulin Signed-off-by: Matthew Brost Cc: Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20211001155825.6762-1-matthew.brost@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 51861a66547c..74e33a4cdfe8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -941,6 +941,10 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, unsigned int n; e = alloc_engines(num_engines); + if (!e) + return ERR_PTR(-ENOMEM); + e->num_engines = num_engines; + for (n = 0; n < num_engines; n++) { struct intel_context *ce; int ret; @@ -974,7 +978,6 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, goto free_engines; } } - e->num_engines = num_engines; return e; -- cgit v1.2.3-70-g09d2 From 07f82a47e8a985ef939826ee8d75fe108c98126e Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 5 Oct 2021 12:31:35 +0100 Subject: drm/i915: Handle Intel igfx + Intel dgfx hybrid graphics setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In short this makes i915 work for hybrid setups (DRI_PRIME=1 with Mesa) when rendering is done on Intel dgfx and scanout/composition on Intel igfx. Before this patch the driver was not quite ready for that setup, mainly because it was able to emit a semaphore wait between the two GPUs, which results in deadlocks because semaphore target location in HWSP is neither shared between the two, nor mapped in both GGTT spaces. To fix it the patch adds an additional check to a couple of relevant code paths in order to prevent using semaphores for inter-engine synchronisation when relevant objects are not in the same GGTT space. v2: * Avoid adding rq->i915. (Chris) v3: * Use GGTT which describes the limit more precisely. Signed-off-by: Tvrtko Ursulin Cc: Daniel Vetter Cc: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20211005113135.768295-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_request.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 79da5eca60af..4f189982f67e 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1145,6 +1145,12 @@ __emit_semaphore_wait(struct i915_request *to, return 0; } +static bool +can_use_semaphore_wait(struct i915_request *to, struct i915_request *from) +{ + return to->engine->gt->ggtt == from->engine->gt->ggtt; +} + static int emit_semaphore_wait(struct i915_request *to, struct i915_request *from, @@ -1153,6 +1159,9 @@ emit_semaphore_wait(struct i915_request *to, const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask; struct i915_sw_fence *wait = &to->submit; + if (!can_use_semaphore_wait(to, from)) + goto await_fence; + if (!intel_context_use_semaphores(to->context)) goto await_fence; @@ -1256,7 +1265,8 @@ __i915_request_await_execution(struct i915_request *to, * immediate execution, and so we must wait until it reaches the * active slot. */ - if (intel_engine_has_semaphores(to->engine) && + if (can_use_semaphore_wait(to, from) && + intel_engine_has_semaphores(to->engine) && !i915_request_has_initial_breadcrumb(to)) { err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); if (err < 0) -- cgit v1.2.3-70-g09d2 From 1a839e016e4964b5c8384e5d82e5e5ac02a23f52 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 5 Oct 2021 10:17:28 -0700 Subject: drm/i915: remove IS_ACTIVE When trying to bring IS_ACTIVE to linux/kconfig.h I thought it wouldn't provide much value just encapsulating it in a boolean context. So I also added the support for handling undefined macros as the IS_ENABLED() counterpart. However the feedback received from Masahiro Yamada was that it is too ugly, not providing much value. And just wrapping in a boolean context is too dumb - we could simply open code it. As detailed in commit babaab2f4738 ("drm/i915: Encapsulate kconfig constant values inside boolean predicates"), the IS_ACTIVE macro was added to workaround a compilation warning. However after checking again our current uses of IS_ACTIVE it turned out there is only 1 case in which it triggers a warning in clang (due -Wconstant-logical-operand) and 2 in smatch. All the others can simply use the shorter version, without wrapping it in any macro. So here I'm dialing all the way back to simply removing the macro. That single case hit by clang can be changed to make the constant come first, so it doesn't think it's mask: - if (context && CONFIG_DRM_I915_FENCE_TIMEOUT) + if (CONFIG_DRM_I915_FENCE_TIMEOUT && context) As talked with Dan Carpenter, that logic will be added in smatch as well, so it will also stop warning about it. Signed-off-by: Lucas De Marchi Reviewed-by: Jani Nikula Reviewed-by: Masahiro Yamada Link: https://patchwork.freedesktop.org/patch/msgid/20211005171728.3147094-1-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine.h | 4 ++-- drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_types.h | 2 +- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 2 +- drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c | 4 ++-- drivers/gpu/drm/i915/gt/selftest_execlists.c | 14 +++++++------- drivers/gpu/drm/i915/i915_config.c | 2 +- drivers/gpu/drm/i915/i915_request.c | 2 +- drivers/gpu/drm/i915/i915_utils.h | 13 ------------- 11 files changed, 18 insertions(+), 31 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 74e33a4cdfe8..d225d3dd0b40 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -804,7 +804,7 @@ static int intel_context_set_gem(struct intel_context *ce, intel_engine_has_semaphores(ce->engine)) __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); - if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) && + if (CONFIG_DRM_I915_REQUEST_TIMEOUT && ctx->i915->params.request_timeout_ms) { unsigned int timeout_ms = ctx->i915->params.request_timeout_ms; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 5130e8ed9564..65fc6ff5f59d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -395,7 +395,7 @@ retry: /* Track the mmo associated with the fenced vma */ vma->mmo = mmo; - if (IS_ACTIVE(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)) + if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND) intel_wakeref_auto(&i915->ggtt.userfault_wakeref, msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index eed4634c08cd..452248884ef1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -275,7 +275,7 @@ static inline bool intel_engine_uses_guc(const struct intel_engine_cs *engine) static inline bool intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) { - if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) return false; return intel_engine_has_preemption(engine); @@ -302,7 +302,7 @@ intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine) static inline bool intel_engine_has_heartbeat(const struct intel_engine_cs *engine) { - if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) + if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) return false; if (intel_engine_is_virtual(engine)) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 74775ae961b2..a3698f611f45 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -207,7 +207,7 @@ out: void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine) { - if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) + if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) return; next_heartbeat(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 5ae1207c363b..9167ce52487c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -556,7 +556,7 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine) static inline bool intel_engine_has_timeslices(const struct intel_engine_cs *engine) { - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + if (!CONFIG_DRM_I915_TIMESLICE_DURATION) return false; return engine->flags & I915_ENGINE_HAS_TIMESLICES; diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 7147fe80919e..73a79c2acd3a 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3339,7 +3339,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_HAS_SEMAPHORES; if (can_preempt(engine)) { engine->flags |= I915_ENGINE_HAS_PREEMPTION; - if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + if (CONFIG_DRM_I915_TIMESLICE_DURATION) engine->flags |= I915_ENGINE_HAS_TIMESLICES; } } diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 317eebf086c3..6e6e4d747cca 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -290,7 +290,7 @@ static int live_heartbeat_fast(void *arg) int err = 0; /* Check that the heartbeat ticks at the desired rate. */ - if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) + if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) return 0; for_each_engine(engine, gt, id) { @@ -352,7 +352,7 @@ static int live_heartbeat_off(void *arg) int err = 0; /* Check that we can turn off heartbeat and not interrupt VIP */ - if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) + if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) return 0; for_each_engine(engine, gt, id) { diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index b3863abc51f5..25a8c4f62b0d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -992,7 +992,7 @@ static int live_timeslice_preempt(void *arg) * need to preempt the current task and replace it with another * ready task. */ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + if (!CONFIG_DRM_I915_TIMESLICE_DURATION) return 0; obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); @@ -1122,7 +1122,7 @@ static int live_timeslice_rewind(void *arg) * but only a few of those requests, forcing us to rewind the * RING_TAIL of the original request. */ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + if (!CONFIG_DRM_I915_TIMESLICE_DURATION) return 0; for_each_engine(engine, gt, id) { @@ -1299,7 +1299,7 @@ static int live_timeslice_queue(void *arg) * ELSP[1] is already occupied, so must rely on timeslicing to * eject ELSP[0] in favour of the queue.) */ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + if (!CONFIG_DRM_I915_TIMESLICE_DURATION) return 0; obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); @@ -1420,7 +1420,7 @@ static int live_timeslice_nopreempt(void *arg) * We should not timeslice into a request that is marked with * I915_REQUEST_NOPREEMPT. */ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + if (!CONFIG_DRM_I915_TIMESLICE_DURATION) return 0; if (igt_spinner_init(&spin, gt)) @@ -2260,7 +2260,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg) int err; /* Preempt cancel non-preemptible spinner in ELSP0 */ - if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) return 0; if (!intel_has_reset_engine(arg->engine->gt)) @@ -2316,7 +2316,7 @@ static int __cancel_fail(struct live_preempt_cancel *arg) struct i915_request *rq; int err; - if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) return 0; if (!intel_has_reset_engine(engine->gt)) @@ -3375,7 +3375,7 @@ static int live_preempt_timeout(void *arg) * Check that we force preemption to occur by cancelling the previous * context if it refuses to yield the GPU. */ - if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) return 0; if (!intel_has_reset_engine(gt)) diff --git a/drivers/gpu/drm/i915/i915_config.c b/drivers/gpu/drm/i915/i915_config.c index b79b5f6d2cfa..afb828dab53b 100644 --- a/drivers/gpu/drm/i915/i915_config.c +++ b/drivers/gpu/drm/i915/i915_config.c @@ -8,7 +8,7 @@ unsigned long i915_fence_context_timeout(const struct drm_i915_private *i915, u64 context) { - if (context && IS_ACTIVE(CONFIG_DRM_I915_FENCE_TIMEOUT)) + if (CONFIG_DRM_I915_FENCE_TIMEOUT && context) return msecs_to_jiffies_timeout(CONFIG_DRM_I915_FENCE_TIMEOUT); return 0; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 4f189982f67e..ed64fa9defdf 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1862,7 +1862,7 @@ long i915_request_wait(struct i915_request *rq, * completion. That requires having a good predictor for the request * duration, which we currently lack. */ - if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) && + if (CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT && __i915_spin_request(rq, state)) goto out; diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 5259edacde38..62f189e064a9 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -458,17 +458,4 @@ static inline bool timer_expired(const struct timer_list *t) return timer_active(t) && !timer_pending(t); } -/* - * This is a lookalike for IS_ENABLED() that takes a kconfig value, - * e.g. CONFIG_DRM_I915_SPIN_REQUEST, and evaluates whether it is non-zero - * i.e. whether the configuration is active. Wrapping up the config inside - * a boolean context prevents clang and smatch from complaining about potential - * issues in confusing logical-&& with bitwise-& for constants. - * - * Sadly IS_ENABLED() itself does not work with kconfig values. - * - * Returns 0 if @config is 0, 1 if set to any value. - */ -#define IS_ACTIVE(config) ((config) != 0) - #endif /* !__I915_UTILS_H */ -- cgit v1.2.3-70-g09d2