diff options
author | Dave Airlie <airlied@redhat.com> | 2022-05-06 16:16:13 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2022-05-06 16:16:14 +1000 |
commit | af3847a7472d2def8358b7ae94b14f1d20fd8661 (patch) | |
tree | 7f091b64716f8ba49e46df46af3303471f162e36 /drivers/gpu/drm/i915 | |
parent | 97ab530870cc23bf20952ce8a1d86196dddc2e6e (diff) | |
parent | 1df1c79cbb7ac9bf148930be3418973c76ba8dde (diff) |
Merge tag 'drm-intel-gt-next-2022-05-05' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
UAPI Changes:
- Add kerneldoc for engine class enum (Matt Roper)
- Add compute engine ABI (Matt Roper)
Driver Changes:
- Define GuC firmware version for DG2 (John Harrison)
- Clear SET_PREDICATE_RESULT prior to executing the ring (Chris Wilson)
- Fix race in __i915_vma_remove_closed (Karol Herbst)
- Add register for compute engine's MMIO-based TLB invalidation (Matt Roper)
- Xe_HP SDV and DG2 have up to 4 CCS engines (Daniele Ceraolo Spurio)
- Add initial Ponte Vecchio definitions (Stuart Summers)
- Document the eviction of the Flat-CCS objects (Ramalingam C)
- Use existing uncore helper to read gpm_timestamp (Umesh Nerlige Ramappa)
- Fix issue with LRI relative addressing (Akeem G Abodunrin)
- Skip poisoning SET_PREDICATE_RESULT on dg2 (Chris Wilson)
- Optimize the ccs_sz calculation per chunk (Ramalingam C)
- Remove superfluous string helper include (Jani Nikula)
- Fix assert in i915_ggtt_pin (Tvrtko Ursulin)
- Use IOMEM_ERR_PTR() directly (Kefeng Wang)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/YnNxCm1pyflu3taj@tursulin-mobl2
Diffstat (limited to 'drivers/gpu/drm/i915')
23 files changed, 263 insertions, 89 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 9529c5455bc3..3e13960615bd 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -5,6 +5,7 @@ #include "gen8_engine_cs.h" #include "i915_drv.h" +#include "intel_engine_regs.h" #include "intel_gpu_commands.h" #include "intel_lrc.h" #include "intel_ring.h" @@ -385,6 +386,59 @@ int gen8_emit_init_breadcrumb(struct i915_request *rq) return 0; } +static int __gen125_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags, + u32 arb) +{ + struct intel_context *ce = rq->context; + u32 wa_offset = lrc_indirect_bb(ce); + u32 *cs; + + cs = intel_ring_begin(rq, 12); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_ARB_ON_OFF | arb; + + *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)); + *cs++ = wa_offset + DG2_PREDICATE_RESULT_WA; + *cs++ = 0; + + *cs++ = MI_BATCH_BUFFER_START_GEN8 | + (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + + /* Fixup stray MI_SET_PREDICATE as it prevents us executing the ring */ + *cs++ = MI_BATCH_BUFFER_START_GEN8; + *cs++ = wa_offset + DG2_PREDICATE_RESULT_BB; + *cs++ = 0; + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + + intel_ring_advance(rq, cs); + + return 0; +} + +int gen125_emit_bb_start_noarb(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) +{ + return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE); +} + +int gen125_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) +{ + return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE); +} + int gen8_emit_bb_start_noarb(struct i915_request *rq, u64 offset, u32 len, const unsigned int flags) diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h index 107ab42539ab..32e3d2b831bb 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h @@ -31,6 +31,13 @@ int gen8_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, const unsigned int flags); +int gen125_emit_bb_start_noarb(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags); +int gen125_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags); + u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs); u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index 594a629cb28f..75a0c55c5aa5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -148,6 +148,7 @@ (REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \ REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1)) +#define RING_PREDICATE_RESULT(base) _MMIO((base) + 0x3b8) /* gen12+ */ #define MI_PREDICATE_RESULT_2(base) _MMIO((base) + 0x3bc) #define LOWER_SLICE_ENABLED (1 << 0) #define LOWER_SLICE_DISABLED (0 << 0) @@ -193,6 +194,7 @@ #define RING_TIMESTAMP_UDW(base) _MMIO((base) + 0x358 + 4) #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) #define RING_CTX_TIMESTAMP(base) _MMIO((base) + 0x3a8) /* gen8+ */ +#define RING_PREDICATE_RESULT(base) _MMIO((base) + 0x3b8) #define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 4) #define RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2) #define RING_FORCE_TO_NONPRIV_ACCESS_RW (0 << 28) /* CFL+ & Gen11+ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 0f6cd96b459f..46a174f8aa00 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -47,7 +47,7 @@ static const u8 uabi_classes[] = { [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY, [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO, [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE, - /* TODO: Add COMPUTE_CLASS mapping once ABI is available */ + [COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE, }; static int engine_cmp(void *priv, const struct list_head *A, diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index f8749c433b7c..86f7a9ac1c39 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3433,10 +3433,17 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) } } - if (intel_engine_has_preemption(engine)) - engine->emit_bb_start = gen8_emit_bb_start; - else - engine->emit_bb_start = gen8_emit_bb_start_noarb; + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { + if (intel_engine_has_preemption(engine)) + engine->emit_bb_start = gen125_emit_bb_start; + else + engine->emit_bb_start = gen125_emit_bb_start_noarb; + } else { + if (intel_engine_has_preemption(engine)) + engine->emit_bb_start = gen8_emit_bb_start; + else + engine->emit_bb_start = gen8_emit_bb_start_noarb; + } engine->busyness = execlists_engine_busyness; } diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index e52718a87f14..556bca3be804 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -39,6 +39,8 @@ #define MI_GLOBAL_GTT (1<<22) #define MI_NOOP MI_INSTR(0, 0) +#define MI_SET_PREDICATE MI_INSTR(0x01, 0) +#define MI_SET_PREDICATE_DISABLE (0 << 0) #define MI_USER_INTERRUPT MI_INSTR(0x02, 0) #define MI_WAIT_FOR_EVENT MI_INSTR(0x03, 0) #define MI_WAIT_FOR_OVERLAY_FLIP (1<<16) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 92394f13b42f..53307ca0eed0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -1175,6 +1175,7 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR, [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR, [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR, + [COMPUTE_CLASS] = GEN12_COMPCTX_TLB_INV_CR, }; struct drm_i915_private *i915 = gt->i915; struct intel_uncore *uncore = gt->uncore; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index a39718a40cc3..a0a49c16babd 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1007,6 +1007,7 @@ #define GEN12_VD_TLB_INV_CR _MMIO(0xcedc) #define GEN12_VE_TLB_INV_CR _MMIO(0xcee0) #define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4) +#define GEN12_COMPCTX_TLB_INV_CR _MMIO(0xcf04) #define GEN12_MERT_MOD_CTRL _MMIO(0xcf28) #define RENDER_MOD_CTRL _MMIO(0xcf2c) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 3f83a9038e13..eec73c66406c 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -904,6 +904,24 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine) engine->name); } +static u32 context_wa_bb_offset(const struct intel_context *ce) +{ + return PAGE_SIZE * ce->wa_bb_page; +} + +static u32 *context_indirect_bb(const struct intel_context *ce) +{ + void *ptr; + + GEM_BUG_ON(!ce->wa_bb_page); + + ptr = ce->lrc_reg_state; + ptr -= LRC_STATE_OFFSET; /* back to start of context image */ + ptr += context_wa_bb_offset(ce); + + return ptr; +} + void lrc_init_state(struct intel_context *ce, struct intel_engine_cs *engine, void *state) @@ -922,6 +940,10 @@ void lrc_init_state(struct intel_context *ce, /* Clear the ppHWSP (inc. per-context counters) */ memset(state, 0, PAGE_SIZE); + /* Clear the indirect wa and storage */ + if (ce->wa_bb_page) + memset(state + context_wa_bb_offset(ce), 0, PAGE_SIZE); + /* * The second page of the context object contains some registers which * must be set up prior to the first execution. @@ -929,6 +951,35 @@ void lrc_init_state(struct intel_context *ce, __lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit); } +u32 lrc_indirect_bb(const struct intel_context *ce) +{ + return i915_ggtt_offset(ce->state) + context_wa_bb_offset(ce); +} + +static u32 *setup_predicate_disable_wa(const struct intel_context *ce, u32 *cs) +{ + /* If predication is active, this will be noop'ed */ + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2); + *cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA; + *cs++ = 0; + *cs++ = 0; /* No predication */ + + /* predicated end, only terminates if SET_PREDICATE_RESULT:0 is clear */ + *cs++ = MI_BATCH_BUFFER_END | BIT(15); + *cs++ = MI_SET_PREDICATE | MI_SET_PREDICATE_DISABLE; + + /* Instructions are no longer predicated (disabled), we can proceed */ + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2); + *cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA; + *cs++ = 0; + *cs++ = 1; /* enable predication before the next BB */ + + *cs++ = MI_BATCH_BUFFER_END; + GEM_BUG_ON(offset_in_page(cs) > DG2_PREDICATE_RESULT_WA); + + return cs; +} + static struct i915_vma * __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) { @@ -1240,24 +1291,6 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) return cs; } -static u32 context_wa_bb_offset(const struct intel_context *ce) -{ - return PAGE_SIZE * ce->wa_bb_page; -} - -static u32 *context_indirect_bb(const struct intel_context *ce) -{ - void *ptr; - - GEM_BUG_ON(!ce->wa_bb_page); - - ptr = ce->lrc_reg_state; - ptr -= LRC_STATE_OFFSET; /* back to start of context image */ - ptr += context_wa_bb_offset(ce); - - return ptr; -} - static void setup_indirect_ctx_bb(const struct intel_context *ce, const struct intel_engine_cs *engine, @@ -1271,9 +1304,11 @@ setup_indirect_ctx_bb(const struct intel_context *ce, while ((unsigned long)cs % CACHELINE_BYTES) *cs++ = MI_NOOP; + GEM_BUG_ON(cs - start > DG2_PREDICATE_RESULT_BB / sizeof(*start)); + setup_predicate_disable_wa(ce, start + DG2_PREDICATE_RESULT_BB / sizeof(*start)); + lrc_setup_indirect_ctx(ce->lrc_reg_state, engine, - i915_ggtt_offset(ce->state) + - context_wa_bb_offset(ce), + lrc_indirect_bb(ce), (cs - start) * sizeof(*cs)); } diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 7371bb5c8129..31be734010db 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -145,4 +145,9 @@ static inline void lrc_runtime_stop(struct intel_context *ce) WRITE_ONCE(stats->active, 0); } +#define DG2_PREDICATE_RESULT_WA (PAGE_SIZE - sizeof(u64)) +#define DG2_PREDICATE_RESULT_BB (2048) + +u32 lrc_indirect_bb(const struct intel_context *ce); + #endif /* __INTEL_LRC_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 9d552f30b627..2c35324b5f68 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -485,16 +485,21 @@ static bool wa_1209644611_applies(int ver, u32 size) * And CCS data can be copied in and out of CCS region through * XY_CTRL_SURF_COPY_BLT. CPU can't access the CCS data directly. * - * When we exhaust the lmem, if the object's placements support smem, then we can - * directly decompress the compressed lmem object into smem and start using it - * from smem itself. + * I915 supports Flat-CCS on lmem only objects. When an objects has smem in + * its preference list, on memory pressure, i915 needs to migrate the lmem + * content into smem. If the lmem object is Flat-CCS compressed by userspace, + * then i915 needs to decompress it. But I915 lack the required information + * for such decompression. Hence I915 supports Flat-CCS only on lmem only objects. * - * But when we need to swapout the compressed lmem object into a smem region - * though objects' placement doesn't support smem, then we copy the lmem content - * as it is into smem region along with ccs data (using XY_CTRL_SURF_COPY_BLT). - * When the object is referred, lmem content will be swaped in along with - * restoration of the CCS data (using XY_CTRL_SURF_COPY_BLT) at corresponding - * location. + * When we exhaust the lmem, Flat-CCS capable objects' lmem backing memory can + * be temporarily evicted to smem, along with the auxiliary CCS state, where + * it can be potentially swapped-out at a later point, if required. + * If userspace later touches the evicted pages, then we always move + * the backing memory back to lmem, which includes restoring the saved CCS state, + * and potentially performing any required swap-in. + * + * For the migration of the lmem objects with smem in placement list, such as + * {lmem, smem}, objects are treated as non Flat-CCS capable objects. */ static inline u32 *i915_flush_dw(u32 *cmd, u32 flags) @@ -647,17 +652,9 @@ static int scatter_list_length(struct scatterlist *sg) static void calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem, - int *src_sz, int *ccs_sz, u32 bytes_to_cpy, - u32 ccs_bytes_to_cpy) + int *src_sz, u32 bytes_to_cpy, u32 ccs_bytes_to_cpy) { if (ccs_bytes_to_cpy) { - /* - * We can only copy the ccs data corresponding to - * the CHUNK_SZ of lmem which is - * GET_CCS_BYTES(i915, CHUNK_SZ)) - */ - *ccs_sz = min_t(int, ccs_bytes_to_cpy, GET_CCS_BYTES(i915, CHUNK_SZ)); - if (!src_is_lmem) /* * When CHUNK_SZ is passed all the pages upto CHUNK_SZ @@ -707,10 +704,10 @@ intel_context_migrate_copy(struct intel_context *ce, struct drm_i915_private *i915 = ce->engine->i915; u32 ccs_bytes_to_cpy = 0, bytes_to_cpy; enum i915_cache_level ccs_cache_level; - int src_sz, dst_sz, ccs_sz; u32 src_offset, dst_offset; u8 src_access, dst_access; struct i915_request *rq; + int src_sz, dst_sz; bool ccs_is_src; int err; @@ -791,7 +788,7 @@ intel_context_migrate_copy(struct intel_context *ce, if (err) goto out_rq; - calculate_chunk_sz(i915, src_is_lmem, &src_sz, &ccs_sz, + calculate_chunk_sz(i915, src_is_lmem, &src_sz, bytes_to_cpy, ccs_bytes_to_cpy); len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, @@ -825,37 +822,35 @@ intel_context_migrate_copy(struct intel_context *ce, bytes_to_cpy -= len; if (ccs_bytes_to_cpy) { + int ccs_sz; + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); if (err) goto out_rq; + ccs_sz = GET_CCS_BYTES(i915, len); err = emit_pte(rq, &it_ccs, ccs_cache_level, false, ccs_is_src ? src_offset : dst_offset, ccs_sz); + if (err < 0) + goto out_rq; + if (err < ccs_sz) { + err = -EINVAL; + goto out_rq; + } err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); if (err) goto out_rq; - /* - * Using max of src_sz and dst_sz, as we need to - * pass the lmem size corresponding to the ccs - * blocks we need to handle. - */ - ccs_sz = max_t(int, ccs_is_src ? ccs_sz : src_sz, - ccs_is_src ? dst_sz : ccs_sz); - err = emit_copy_ccs(rq, dst_offset, dst_access, - src_offset, src_access, ccs_sz); + src_offset, src_access, len); if (err) goto out_rq; err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); if (err) goto out_rq; - - /* Converting back to ccs bytes */ - ccs_sz = GET_CCS_BYTES(rq->engine->i915, ccs_sz); ccs_bytes_to_cpy -= ccs_sz; } diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 9881a6790574..fdd25691beda 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -10,8 +10,6 @@ #include "intel_gt_regs.h" #include "intel_sseu.h" -#include "linux/string_helpers.h" - void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, u8 max_subslices, u8 max_eus_per_subslice) { diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 6ba52ef1acb8..8b2c11dbe354 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -128,6 +128,27 @@ static int context_flush(struct intel_context *ce, long timeout) return err; } +static int get_lri_mask(struct intel_engine_cs *engine, u32 lri) +{ + if ((lri & MI_LRI_LRM_CS_MMIO) == 0) + return ~0u; + + if (GRAPHICS_VER(engine->i915) < 12) + return 0xfff; + + switch (engine->class) { + default: + case RENDER_CLASS: + case COMPUTE_CLASS: + return 0x07ff; + case COPY_ENGINE_CLASS: + return 0x0fff; + case VIDEO_DECODE_CLASS: + case VIDEO_ENHANCEMENT_CLASS: + return 0x3fff; + } +} + static int live_lrc_layout(void *arg) { struct intel_gt *gt = arg; @@ -167,6 +188,7 @@ static int live_lrc_layout(void *arg) dw = 0; do { u32 lri = READ_ONCE(hw[dw]); + u32 lri_mask; if (lri == 0) { dw++; @@ -194,6 +216,18 @@ static int live_lrc_layout(void *arg) break; } + /* + * When bit 19 of MI_LOAD_REGISTER_IMM instruction + * opcode is set on Gen12+ devices, HW does not + * care about certain register address offsets, and + * instead check the following for valid address + * ranges on specific engines: + * RCS && CCS: BITS(0 - 10) + * BCS: BITS(0 - 11) + * VECS && VCS: BITS(0 - 13) + */ + lri_mask = get_lri_mask(engine, lri); + lri &= 0x7f; lri++; dw++; @@ -201,7 +235,7 @@ static int live_lrc_layout(void *arg) while (lri) { u32 offset = READ_ONCE(hw[dw]); - if (offset != lrc[dw]) { + if ((offset ^ lrc[dw]) & lri_mask) { pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", engine->name, dw, offset, lrc[dw]); err = -EINVAL; @@ -911,6 +945,19 @@ create_user_vma(struct i915_address_space *vm, unsigned long size) return vma; } +static u32 safe_poison(u32 offset, u32 poison) +{ + /* + * Do not enable predication as it will nop all subsequent commands, + * not only disabling the tests (by preventing all the other SRM) but + * also preventing the arbitration events at the end of the request. + */ + if (offset == i915_mmio_reg_offset(RING_PREDICATE_RESULT(0))) + poison &= ~REG_BIT(0); + + return poison; +} + static struct i915_vma * store_context(struct intel_context *ce, struct i915_vma *scratch) { @@ -1120,7 +1167,9 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison) *cs++ = MI_LOAD_REGISTER_IMM(len); while (len--) { *cs++ = hw[dw]; - *cs++ = poison; + *cs++ = safe_poison(hw[dw] & get_lri_mask(ce->engine, + MI_LRI_LRM_CS_MMIO), + poison); dw += 2; } } while (dw < PAGE_SIZE / sizeof(u32) && diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 61a6f2424e24..75291e9846c5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1200,20 +1200,6 @@ static u32 gpm_timestamp_shift(struct intel_gt *gt) return 3 - shift; } -static u64 gpm_timestamp(struct intel_gt *gt) -{ - u32 lo, hi, old_hi, loop = 0; - - hi = intel_uncore_read(gt->uncore, MISC_STATUS1); - do { - lo = intel_uncore_read(gt->uncore, MISC_STATUS0); - old_hi = hi; - hi = intel_uncore_read(gt->uncore, MISC_STATUS1); - } while (old_hi != hi && loop++ < 2); - - return ((u64)hi << 32) | lo; -} - static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) { struct intel_gt *gt = guc_to_gt(guc); @@ -1223,7 +1209,8 @@ static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) lockdep_assert_held(&guc->timestamp.lock); gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); - gpm_ts = gpm_timestamp(gt) >> guc->timestamp.shift; + gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0, + MISC_STATUS1) >> guc->timestamp.shift; gt_stamp_lo = lower_32_bits(gpm_ts); *now = ktime_get(); @@ -3910,6 +3897,8 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) */ engine->emit_bb_start = gen8_emit_bb_start; + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + engine->emit_bb_start = gen125_emit_bb_start; } static void rcs_submission_override(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index a876d39e6bcf..d078f884b5e3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -53,6 +53,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * firmware as TGL. */ #define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \ + fw_def(DG2, 0, guc_def(dg2, 70, 1, 2)) \ fw_def(ALDERLAKE_P, 0, guc_def(adlp, 70, 1, 1)) \ fw_def(ALDERLAKE_S, 0, guc_def(tgl, 70, 1, 1)) \ fw_def(DG1, 0, guc_def(dg1, 70, 1, 1)) \ diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c index 475a6f824cad..18d38cb59923 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.c +++ b/drivers/gpu/drm/i915/i915_drm_client.c @@ -81,6 +81,7 @@ static const char * const uabi_class_names[] = { [I915_ENGINE_CLASS_COPY] = "copy", [I915_ENGINE_CLASS_VIDEO] = "video", [I915_ENGINE_CLASS_VIDEO_ENHANCE] = "video-enhance", + [I915_ENGINE_CLASS_COMPUTE] = "compute", }; static u64 busy_add(struct i915_gem_context *ctx, unsigned int class) diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h index 5f5b02b01ba0..f796c5e8e060 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.h +++ b/drivers/gpu/drm/i915/i915_drm_client.h @@ -13,7 +13,7 @@ #include "gt/intel_engine_types.h" -#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE +#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE struct drm_i915_private; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a6cf9716d6aa..3ed9021c615d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1059,6 +1059,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, INTEL_ALDERLAKE_P) #define IS_XEHPSDV(dev_priv) IS_PLATFORM(dev_priv, INTEL_XEHPSDV) #define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, INTEL_DG2) +#define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, INTEL_PONTEVECCHIO) + #define IS_DG2_G10(dev_priv) \ IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G10) #define IS_DG2_G11(dev_priv) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 38f7de778914..987bdeb090a5 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1037,7 +1037,8 @@ static const struct intel_device_info xehpsdv_info = { BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VECS1) | BIT(VECS2) | BIT(VECS3) | BIT(VCS0) | BIT(VCS1) | BIT(VCS2) | BIT(VCS3) | - BIT(VCS4) | BIT(VCS5) | BIT(VCS6) | BIT(VCS7), + BIT(VCS4) | BIT(VCS5) | BIT(VCS6) | BIT(VCS7) | + BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3), .require_force_probe = 1, }; @@ -1056,7 +1057,8 @@ static const struct intel_device_info xehpsdv_info = { .platform_engine_mask = \ BIT(RCS0) | BIT(BCS0) | \ BIT(VECS0) | BIT(VECS1) | \ - BIT(VCS0) | BIT(VCS2) + BIT(VCS0) | BIT(VCS2) | \ + BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3) __maybe_unused static const struct intel_device_info dg2_info = { @@ -1074,6 +1076,27 @@ static const struct intel_device_info ats_m_info = { .require_force_probe = 1, }; +#define XE_HPC_FEATURES \ + XE_HP_FEATURES, \ + .dma_mask_size = 52 + +__maybe_unused +static const struct intel_device_info pvc_info = { + XE_HPC_FEATURES, + XE_HPM_FEATURES, + DGFX_FEATURES, + .graphics.rel = 60, + .media.rel = 60, + PLATFORM(INTEL_PONTEVECCHIO), + .display = { 0 }, + .has_flat_ccs = 0, + .platform_engine_mask = + BIT(BCS0) | + BIT(VCS0) | + BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3), + .require_force_probe = 1, +}; + #undef PLATFORM /* diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 162e8d83691b..4f6db539571a 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -548,7 +548,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) int err; if (WARN_ON_ONCE(vma->obj->flags & I915_BO_ALLOC_GPU_ONLY)) - return IO_ERR_PTR(-EINVAL); + return IOMEM_ERR_PTR(-EINVAL); if (!i915_gem_object_is_lmem(vma->obj)) { if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { @@ -601,7 +601,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) err_unpin: __i915_vma_unpin(vma); err: - return IO_ERR_PTR(err); + return IOMEM_ERR_PTR(err); } void i915_vma_flush_writes(struct i915_vma *vma) @@ -1565,9 +1565,7 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (ww) return __i915_ggtt_pin(vma, ww, align, flags); -#ifdef CONFIG_LOCKDEP - WARN_ON(dma_resv_held(vma->obj->base.resv)); -#endif + lockdep_assert_not_held(&vma->obj->base.resv->lock.base); for_i915_gem_ww(&_ww, err, true) { err = i915_gem_object_lock(vma->obj, &_ww); @@ -1615,17 +1613,17 @@ void i915_vma_close(struct i915_vma *vma) static void __i915_vma_remove_closed(struct i915_vma *vma) { - struct intel_gt *gt = vma->vm->gt; - - spin_lock_irq(>->closed_lock); list_del_init(&vma->closed_link); - spin_unlock_irq(>->closed_lock); } void i915_vma_reopen(struct i915_vma *vma) { + struct intel_gt *gt = vma->vm->gt; + + spin_lock_irq(>->closed_lock); if (i915_vma_is_closed(vma)) __i915_vma_remove_closed(vma); + spin_unlock_irq(>->closed_lock); } static void force_unbind(struct i915_vma *vma) @@ -1641,6 +1639,7 @@ static void force_unbind(struct i915_vma *vma) static void release_references(struct i915_vma *vma, bool vm_ddestroy) { struct drm_i915_gem_object *obj = vma->obj; + struct intel_gt *gt = vma->vm->gt; GEM_BUG_ON(i915_vma_is_active(vma)); @@ -1651,7 +1650,9 @@ static void release_references(struct i915_vma *vma, bool vm_ddestroy) spin_unlock(&obj->vma.lock); + spin_lock_irq(>->closed_lock); __i915_vma_remove_closed(vma); + spin_unlock_irq(>->closed_lock); if (vm_ddestroy) i915_vm_resv_put(vma->vm); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 6034991d89fe..88ca0bd9c900 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -317,7 +317,6 @@ static inline bool i915_node_color_differs(const struct drm_mm_node *node, * Returns a valid iomapped pointer or ERR_PTR. */ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); -#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x)) /** * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 41a5b98d1342..b0e62a411534 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -72,6 +72,7 @@ static const char * const platform_names[] = { PLATFORM_NAME(ALDERLAKE_P), PLATFORM_NAME(XEHPSDV), PLATFORM_NAME(DG2), + PLATFORM_NAME(PONTEVECCHIO), }; #undef PLATFORM_NAME diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 576d15a04c9e..ec0b8095e7fa 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -88,6 +88,7 @@ enum intel_platform { INTEL_ALDERLAKE_P, INTEL_XEHPSDV, INTEL_DG2, + INTEL_PONTEVECCHIO, INTEL_MAX_PLATFORMS }; |