summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2023-06-09 16:43:35 +1000
committerDave Airlie <airlied@redhat.com>2023-06-09 16:43:36 +1000
commitba57b9b11f78530146f02b776854b2b6b6d344a4 (patch)
tree1a5d8dd39d83b511523d7598d295c8023ead56d0
parent959294e47953eafd1ddbeee362827f4a8aa07377 (diff)
parent24335848e543dc95c9e2ffa0108d879ffefd0442 (diff)
Merge tag 'drm-intel-gt-next-2023-06-08' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
UAPI Changes: - I915_GEM_CREATE_EXT_SET_PAT for Mesa on Meteorlake. Driver Changes: Fixes/improvements/new stuff: - Use large rings for compute contexts (Chris Wilson) - Better logging/debug of unexpected GuC communication issues (Michal Wajdeczko) - Clear out entire reports after reading if not power of 2 size (Ashutosh Dixit) - Limit lmem allocation size to succeed on SmallBars (Andrzej Hajda) - perf/OA capture robustness improvements on DG2 (Umesh Nerlige Ramappa) - Fix error code in intel_gsc_uc_heci_cmd_submit_nonpriv() (Dan Carpenter) Future platform enablement: - Add workaround 14016712196 (Tejas Upadhyay) - HuC loading for MTL (Daniele Ceraolo Spurio) - Allow user to set cache at BO creation (Fei Yang) Miscellaneous: - Use system include style for drm headers (Jani Nikula) - Drop legacy CTB definitions (Michal Wajdeczko) - Turn off the timer to sample frequencies when GT is parked (Ashutosh Dixit) - Make PMU sample array two-dimensional (Ashutosh Dixit) - Use the correct error value when kernel_context() fails (Andi Shyti) - Fix second parameter type of pre-gen8 pte_encode callbacks (Nathan Chancellor) - Fix parameter in gmch_ggtt_insert_{entries, page}() (Nathan Chancellor) - Fix size_t format specifier in gsccs_send_message() (Nathan Chancellor) - Use the fdinfo helper (Tvrtko Ursulin) - Add some missing error propagation (Tvrtko Ursulin) - Reduce I915_MAX_GT to 2 (Matt Atwood) - Rename I915_PMU_MAX_GTS to I915_PMU_MAX_GT (Matt Atwood) - Remove some obsolete definitions (John Harrison) Merges: - Merge drm/drm-next into drm-intel-gt-next (Tvrtko Ursulin) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/ZIH09fqe5v5yArsu@tursulin-desk
-rw-r--r--drivers/gpu/drm/i915/Kconfig.debug1
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_create.c40
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.c6
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c14
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.c38
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c8
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_execlists.c12
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_tlb.c11
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h21
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h30
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h74
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c34
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c81
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h11
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h33
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c224
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.h26
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c235
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h6
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_print.h21
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c10
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c133
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h26
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h6
-rw-r--r--drivers/gpu/drm/i915/i915_driver.c6
-rw-r--r--drivers/gpu/drm/i915/i915_drm_client.c65
-rw-r--r--drivers/gpu/drm/i915/i915_drm_client.h22
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h4
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c6
-rw-r--r--drivers/gpu/drm/i915/i915_getparam.c6
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c109
-rw-r--r--drivers/gpu/drm/i915/i915_perf_types.h12
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c34
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.h8
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h3
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h17
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c4
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_huc.c2
-rw-r--r--include/uapi/drm/i915_drm.h44
45 files changed, 1052 insertions, 438 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 47e845353ffa..2d21930d5501 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -157,6 +157,7 @@ config DRM_I915_SW_FENCE_CHECK_DAG
config DRM_I915_DEBUG_GUC
bool "Enable additional driver debugging for GuC"
depends on DRM_I915
+ select STACKDEPOT
default n
help
Choose this option to turn on extra driver debugging that may affect
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 5402a7bbcb1d..9a9ff84c90d7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -964,7 +964,11 @@ static int intel_context_set_gem(struct intel_context *ce,
RCU_INIT_POINTER(ce->gem_context, ctx);
GEM_BUG_ON(intel_context_is_pinned(ce));
- ce->ring_size = SZ_16K;
+
+ if (ce->engine->class == COMPUTE_CLASS)
+ ce->ring_size = SZ_512K;
+ else
+ ce->ring_size = SZ_16K;
i915_vm_put(ce->vm);
ce->vm = i915_gem_context_get_eb_vm(ctx);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index bfe1dbda4cb7..d24c0ce8805c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -245,6 +245,7 @@ struct create_ext {
unsigned int n_placements;
unsigned int placement_mask;
unsigned long flags;
+ unsigned int pat_index;
};
static void repr_placements(char *buf, size_t size,
@@ -394,11 +395,43 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data
return 0;
}
+static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+{
+ struct create_ext *ext_data = data;
+ struct drm_i915_private *i915 = ext_data->i915;
+ struct drm_i915_gem_create_ext_set_pat ext;
+ unsigned int max_pat_index;
+
+ BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+ offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+ /* Limiting the extension only to Meteor Lake */
+ if (!IS_METEORLAKE(i915))
+ return -ENODEV;
+
+ if (copy_from_user(&ext, base, sizeof(ext)))
+ return -EFAULT;
+
+ max_pat_index = INTEL_INFO(i915)->max_pat_index;
+
+ if (ext.pat_index > max_pat_index) {
+ drm_dbg(&i915->drm, "PAT index is invalid: %u\n",
+ ext.pat_index);
+ return -EINVAL;
+ }
+
+ ext_data->pat_index = ext.pat_index;
+
+ return 0;
+}
+
static const i915_user_extension_fn create_extensions[] = {
[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+ [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
};
+#define PAT_INDEX_NOT_SET 0xffff
/**
* i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it.
* @dev: drm device pointer
@@ -418,6 +451,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
return -EINVAL;
+ ext_data.pat_index = PAT_INDEX_NOT_SET;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
create_extensions,
ARRAY_SIZE(create_extensions),
@@ -454,5 +488,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
if (IS_ERR(obj))
return PTR_ERR(obj);
+ if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
+ i915_gem_object_set_pat_index(obj, ext_data.pat_index);
+ /* Mark pat_index is set by UMD */
+ obj->pat_set_by_user = true;
+ }
+
return i915_gem_publish(obj, file, &args->size, &args->handle);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 46a19b099ec8..97ac6fb37958 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -209,6 +209,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj)
return false;
/*
+ * Always flush cache for UMD objects at creation time.
+ */
+ if (obj->pat_set_by_user)
+ return true;
+
+ /*
* EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
* possible for userspace to bypass the GTT caching bits set by the
* kernel, as per the given object cache_level. This is troublesome
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index ad6a3b2fb387..7021b6e9b219 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -348,8 +348,10 @@ static int live_parallel_switch(void *arg)
continue;
ce = intel_context_create(data[m].ce[0]->engine);
- if (IS_ERR(ce))
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
goto out;
+ }
err = intel_context_pin(ce);
if (err) {
@@ -369,8 +371,10 @@ static int live_parallel_switch(void *arg)
worker = kthread_create_worker(0, "igt/parallel:%s",
data[n].ce[0]->engine->name);
- if (IS_ERR(worker))
+ if (IS_ERR(worker)) {
+ err = PTR_ERR(worker);
goto out;
+ }
data[n].worker = worker;
}
@@ -399,8 +403,10 @@ static int live_parallel_switch(void *arg)
}
}
- if (igt_live_test_end(&t))
- err = -EIO;
+ if (igt_live_test_end(&t)) {
+ err = err ?: -EIO;
+ break;
+ }
}
out:
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index e1c76e5bfa82..23857cc08eca 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -177,14 +177,40 @@ u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv
return cs;
}
+static int mtl_dummy_pipe_control(struct i915_request *rq)
+{
+ /* Wa_14016712196 */
+ if (IS_MTL_GRAPHICS_STEP(rq->engine->i915, M, STEP_A0, STEP_B0) ||
+ IS_MTL_GRAPHICS_STEP(rq->engine->i915, P, STEP_A0, STEP_B0)) {
+ u32 *cs;
+
+ /* dummy PIPE_CONTROL + depth flush */
+ cs = intel_ring_begin(rq, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+ cs = gen12_emit_pipe_control(cs,
+ 0,
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH,
+ LRC_PPHWSP_SCRATCH_ADDR);
+ intel_ring_advance(rq, cs);
+ }
+
+ return 0;
+}
+
int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
{
struct intel_engine_cs *engine = rq->engine;
if (mode & EMIT_FLUSH) {
u32 flags = 0;
+ int err;
u32 *cs;
+ err = mtl_dummy_pipe_control(rq);
+ if (err)
+ return err;
+
flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
flags |= PIPE_CONTROL_FLUSH_L3;
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
@@ -217,6 +243,11 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
if (mode & EMIT_INVALIDATE) {
u32 flags = 0;
u32 *cs, count;
+ int err;
+
+ err = mtl_dummy_pipe_control(rq);
+ if (err)
+ return err;
flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TLB_INVALIDATE;
@@ -733,6 +764,13 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
+ /* Wa_14016712196 */
+ if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+ IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+ /* dummy PIPE_CONTROL + depth flush */
+ cs = gen12_emit_pipe_control(cs, 0,
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0);
+
if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
/* Wa_1409600907 */
flags |= PIPE_CONTROL_DEPTH_STALL;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 2a7942fac798..dd0ed941441a 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -1015,16 +1015,16 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
/*
* For pre-gen8 platforms pat_index is the same as enum i915_cache_level,
- * so these PTE encode functions are left with using cache_level.
+ * so the switch-case statements in these PTE encode functions are still valid.
* See translation table LEGACY_CACHELEVEL.
*/
static u64 snb_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
- switch (level) {
+ switch (pat_index) {
case I915_CACHE_L3_LLC:
case I915_CACHE_LLC:
pte |= GEN6_PTE_CACHE_LLC;
@@ -1033,19 +1033,19 @@ static u64 snb_pte_encode(dma_addr_t addr,
pte |= GEN6_PTE_UNCACHED;
break;
default:
- MISSING_CASE(level);
+ MISSING_CASE(pat_index);
}
return pte;
}
static u64 ivb_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
- switch (level) {
+ switch (pat_index) {
case I915_CACHE_L3_LLC:
pte |= GEN7_PTE_CACHE_L3_LLC;
break;
@@ -1056,14 +1056,14 @@ static u64 ivb_pte_encode(dma_addr_t addr,
pte |= GEN6_PTE_UNCACHED;
break;
default:
- MISSING_CASE(level);
+ MISSING_CASE(pat_index);
}
return pte;
}
static u64 byt_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
@@ -1071,31 +1071,31 @@ static u64 byt_pte_encode(dma_addr_t addr,
if (!(flags & PTE_READ_ONLY))
pte |= BYT_PTE_WRITEABLE;
- if (level != I915_CACHE_NONE)
+ if (pat_index != I915_CACHE_NONE)
pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
return pte;
}
static u64 hsw_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
- if (level != I915_CACHE_NONE)
+ if (pat_index != I915_CACHE_NONE)
pte |= HSW_WB_LLC_AGE3;
return pte;
}
static u64 iris_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
- switch (level) {
+ switch (pat_index) {
case I915_CACHE_NONE:
break;
case I915_CACHE_WT:
@@ -1326,6 +1326,9 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start,
ggtt->error_capture.size);
+ list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
+ intel_uc_resume_mappings(&gt->uc);
+
ggtt->invalidate(ggtt);
if (flush)
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
index d6a74ae2527b..866c416afb73 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
@@ -18,10 +18,10 @@
static void gmch_ggtt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 unused)
{
- unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+ unsigned int flags = (pat_index == I915_CACHE_NONE) ?
AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
intel_gmch_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
@@ -29,10 +29,10 @@ static void gmch_ggtt_insert_page(struct i915_address_space *vm,
static void gmch_ggtt_insert_entries(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 unused)
{
- unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+ unsigned int flags = (pat_index == I915_CACHE_NONE) ?
AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
intel_gmch_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT,
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 736b89a8ecf5..4202df5b8c12 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -1530,8 +1530,8 @@ static int live_busywait_preempt(void *arg)
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
enum intel_engine_id id;
- int err = -ENOMEM;
u32 *map;
+ int err;
/*
* Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
@@ -1539,13 +1539,17 @@ static int live_busywait_preempt(void *arg)
*/
ctx_hi = kernel_context(gt->i915, NULL);
- if (!ctx_hi)
- return -ENOMEM;
+ if (IS_ERR(ctx_hi))
+ return PTR_ERR(ctx_hi);
+
ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
ctx_lo = kernel_context(gt->i915, NULL);
- if (!ctx_lo)
+ if (IS_ERR(ctx_lo)) {
+ err = PTR_ERR(ctx_lo);
goto err_ctx_hi;
+ }
+
ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c
index 4493c8518e91..3bd6b540257b 100644
--- a/drivers/gpu/drm/i915/gt/selftest_tlb.c
+++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c
@@ -190,11 +190,18 @@ out:
static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt)
{
+ struct intel_memory_region *mr = gt->i915->mm.regions[INTEL_REGION_LMEM_0];
+ resource_size_t size = SZ_1G;
+
/*
* Allocation of largest possible page size allows to test all types
- * of pages.
+ * of pages. To succeed with both allocations, especially in case of Small
+ * BAR, try to allocate no more than quarter of mappable memory.
*/
- return i915_gem_object_create_lmem(gt->i915, SZ_1G, I915_BO_ALLOC_CONTIGUOUS);
+ if (mr && size > mr->io_size / 4)
+ size = mr->io_size / 4;
+
+ return i915_gem_object_create_lmem(gt->i915, size, I915_BO_ALLOC_CONTIGUOUS);
}
static struct drm_i915_gem_object *create_smem(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
index 28b8387f97b7..f7d70db16d76 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
@@ -167,25 +167,4 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
* - **flags**, holds various bits to control message handling
*/
-/*
- * Definition of the command transport message header (DW0)
- *
- * bit[4..0] message len (in dwords)
- * bit[7..5] reserved
- * bit[8] response (G2H only)
- * bit[8] write fence to desc (H2G only)
- * bit[9] write status to H2G buff (H2G only)
- * bit[10] send status back via G2H (H2G only)
- * bit[15..11] reserved
- * bit[31..16] action code
- */
-#define GUC_CT_MSG_LEN_SHIFT 0
-#define GUC_CT_MSG_LEN_MASK 0x1F
-#define GUC_CT_MSG_IS_RESPONSE (1 << 8)
-#define GUC_CT_MSG_WRITE_FENCE_TO_DESC (1 << 8)
-#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF (1 << 9)
-#define GUC_CT_MSG_SEND_STATUS (1 << 10)
-#define GUC_CT_MSG_ACTION_SHIFT 16
-#define GUC_CT_MSG_ACTION_MASK 0xFFFF
-
#endif /* _ABI_GUC_COMMUNICATION_CTB_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h
index 7d5ba4d97d70..98eb4f46572b 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h
@@ -24,6 +24,7 @@
* | | 30:28 | **TYPE** - message type |
* | | | - _`GUC_HXG_TYPE_REQUEST` = 0 |
* | | | - _`GUC_HXG_TYPE_EVENT` = 1 |
+ * | | | - _`GUC_HXG_TYPE_FAST_REQUEST` = 2 |
* | | | - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3 |
* | | | - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5 |
* | | | - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6 |
@@ -46,6 +47,7 @@
#define GUC_HXG_MSG_0_TYPE (0x7 << 28)
#define GUC_HXG_TYPE_REQUEST 0u
#define GUC_HXG_TYPE_EVENT 1u
+#define GUC_HXG_TYPE_FAST_REQUEST 2u
#define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u
#define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u
#define GUC_HXG_TYPE_RESPONSE_FAILURE 6u
@@ -90,6 +92,34 @@
#define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD
/**
+ * DOC: HXG Fast Request
+ *
+ * The `HXG Request`_ message should be used to initiate asynchronous activity
+ * for which confirmation or return data is not expected.
+ *
+ * If confirmation is required then `HXG Request`_ shall be used instead.
+ *
+ * The recipient of this message may only use `HXG Failure`_ message if it was
+ * unable to accept this request (like invalid data).
+ *
+ * Format of `HXG Fast Request`_ message is same as `HXG Request`_ except @TYPE.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN - see `HXG Message`_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = `GUC_HXG_TYPE_FAST_REQUEST`_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | DATA0 - see `HXG Request`_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION - see `HXG Request`_ |
+ * +---+-------+--------------------------------------------------------------+
+ * |...| | DATAn - see `HXG Request`_ |
+ * +---+-------+--------------------------------------------------------------+
+ */
+
+/**
* DOC: HXG Event
*
* The `HXG Event`_ message should be used to initiate asynchronous activity
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h
new file mode 100644
index 000000000000..714f0c256118
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _INTEL_GSC_BINARY_HEADERS_H_
+#define _INTEL_GSC_BINARY_HEADERS_H_
+
+#include <linux/types.h>
+
+/* Code partition directory (CPD) structures */
+struct intel_gsc_cpd_header_v2 {
+ u32 header_marker;
+#define INTEL_GSC_CPD_HEADER_MARKER 0x44504324
+
+ u32 num_of_entries;
+ u8 header_version;
+ u8 entry_version;
+ u8 header_length; /* in bytes */
+ u8 flags;
+ u32 partition_name;
+ u32 crc32;
+} __packed;
+
+struct intel_gsc_cpd_entry {
+ u8 name[12];
+
+ /*
+ * Bits 0-24: offset from the beginning of the code partition
+ * Bit 25: huffman compressed
+ * Bits 26-31: reserved
+ */
+ u32 offset;
+#define INTEL_GSC_CPD_ENTRY_OFFSET_MASK GENMASK(24, 0)
+#define INTEL_GSC_CPD_ENTRY_HUFFMAN_COMP BIT(25)
+
+ /*
+ * Module/Item length, in bytes. For Huffman-compressed modules, this
+ * refers to the uncompressed size. For software-compressed modules,
+ * this refers to the compressed size.
+ */
+ u32 length;
+
+ u8 reserved[4];
+} __packed;
+
+struct intel_gsc_version {
+ u16 major;
+ u16 minor;
+ u16 hotfix;
+ u16 build;
+} __packed;
+
+struct intel_gsc_manifest_header {
+ u32 header_type; /* 0x4 for manifest type */
+ u32 header_length; /* in dwords */
+ u32 header_version;
+ u32 flags;
+ u32 vendor;
+ u32 date;
+ u32 size; /* In dwords, size of entire manifest (header + extensions) */
+ u32 header_id;
+ u32 internal_data;
+ struct intel_gsc_version fw_version;
+ u32 security_version;
+ struct intel_gsc_version meu_kit_version;
+ u32 meu_manifest_version;
+ u8 general_data[4];
+ u8 reserved3[56];
+ u32 modulus_size; /* in dwords */
+ u32 exponent_size; /* in dwords */
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
index ebee0b5a2c1d..5f138de3c14f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
@@ -5,8 +5,8 @@
#include <linux/component.h>
-#include "drm/i915_component.h"
-#include "drm/i915_gsc_proxy_mei_interface.h"
+#include <drm/i915_component.h>
+#include <drm/i915_gsc_proxy_mei_interface.h>
#include "gt/intel_gt.h"
#include "gt/intel_gt_print.h"
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
index fb0984f875f9..c659cc01f32f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
@@ -29,13 +29,32 @@ static void gsc_work(struct work_struct *work)
if (actions & GSC_ACTION_FW_LOAD) {
ret = intel_gsc_uc_fw_upload(gsc);
- if (ret == -EEXIST) /* skip proxy if not a new load */
- actions &= ~GSC_ACTION_FW_LOAD;
- else if (ret)
+ if (!ret)
+ /* setup proxy on a new load */
+ actions |= GSC_ACTION_SW_PROXY;
+ else if (ret != -EEXIST)
goto out_put;
+
+ /*
+ * The HuC auth can be done both before or after the proxy init;
+ * if done after, a proxy request will be issued and must be
+ * serviced before the authentication can complete.
+ * Since this worker also handles proxy requests, we can't
+ * perform an action that requires the proxy from within it and
+ * then stall waiting for it, because we'd be blocking the
+ * service path. Therefore, it is easier for us to load HuC
+ * first and do proxy later. The GSC will ack the HuC auth and
+ * then send the HuC proxy request as part of the proxy init
+ * flow.
+ * Note that we can only do the GSC auth if the GuC auth was
+ * successful.
+ */
+ if (intel_uc_uses_huc(&gt->uc) &&
+ intel_huc_is_authenticated(&gt->uc.huc, INTEL_HUC_AUTH_BY_GUC))
+ intel_huc_auth(&gt->uc.huc, INTEL_HUC_AUTH_BY_GSC);
}
- if (actions & (GSC_ACTION_FW_LOAD | GSC_ACTION_SW_PROXY)) {
+ if (actions & GSC_ACTION_SW_PROXY) {
if (!intel_gsc_uc_fw_init_done(gsc)) {
gt_err(gt, "Proxy request received with GSC not loaded!\n");
goto out_put;
@@ -90,7 +109,12 @@ void intel_gsc_uc_init_early(struct intel_gsc_uc *gsc)
{
struct intel_gt *gt = gsc_uc_to_gt(gsc);
- intel_uc_fw_init_early(&gsc->fw, INTEL_UC_FW_TYPE_GSC);
+ /*
+ * GSC FW needs to be copied to a dedicated memory allocations for
+ * loading (see gsc->local), so we don't need to GGTT map the FW image
+ * itself into GGTT.
+ */
+ intel_uc_fw_init_early(&gsc->fw, INTEL_UC_FW_TYPE_GSC, false);
INIT_WORK(&gsc->work, gsc_work);
/* we can arrive here from i915_driver_early_probe for primary
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c
index 579c0f5a1438..89ed5ee9cded 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c
@@ -99,7 +99,7 @@ void intel_gsc_uc_heci_cmd_emit_mtl_header(struct intel_gsc_mtl_header *header,
u64 host_session_id)
{
host_session_id &= ~HOST_SESSION_MASK;
- if (heci_client_id == HECI_MEADDRESS_PXP)
+ if (host_session_id && heci_client_id == HECI_MEADDRESS_PXP)
host_session_id |= HOST_SESSION_PXP_SINGLE;
header->validity_marker = GSC_HECI_VALIDITY_MARKER;
@@ -202,7 +202,7 @@ out_ww:
if (++trials < 10)
goto retry;
else
- err = EAGAIN;
+ err = -EAGAIN;
}
}
i915_gem_ww_ctx_fini(&ww);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index c9f20385f6a0..2eb891b270ae 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -164,7 +164,7 @@ void intel_guc_init_early(struct intel_guc *guc)
struct intel_gt *gt = guc_to_gt(guc);
struct drm_i915_private *i915 = gt->i915;
- intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC);
+ intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC, true);
intel_guc_ct_init_early(&guc->ct);
intel_guc_log_init_early(&guc->log);
intel_guc_submission_init_early(guc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index a22e33f37cae..f28a3a83742d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -376,6 +376,24 @@ void intel_guc_ct_disable(struct intel_guc_ct *ct)
}
}
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+static void ct_track_lost_and_found(struct intel_guc_ct *ct, u32 fence, u32 action)
+{
+ unsigned int lost = fence % ARRAY_SIZE(ct->requests.lost_and_found);
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+ unsigned long entries[SZ_32];
+ unsigned int n;
+
+ n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
+
+ /* May be called under spinlock, so avoid sleeping */
+ ct->requests.lost_and_found[lost].stack = stack_depot_save(entries, n, GFP_NOWAIT);
+#endif
+ ct->requests.lost_and_found[lost].fence = fence;
+ ct->requests.lost_and_found[lost].action = action;
+}
+#endif
+
static u32 ct_get_next_fence(struct intel_guc_ct *ct)
{
/* For now it's trivial */
@@ -426,11 +444,11 @@ static int ct_write(struct intel_guc_ct *ct,
FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence);
- type = (flags & INTEL_GUC_CT_SEND_NB) ? GUC_HXG_TYPE_EVENT :
+ type = (flags & INTEL_GUC_CT_SEND_NB) ? GUC_HXG_TYPE_FAST_REQUEST :
GUC_HXG_TYPE_REQUEST;
hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, type) |
- FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
- GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION |
+ GUC_HXG_REQUEST_MSG_0_DATA0, action[0]);
CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n",
tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]);
@@ -447,6 +465,11 @@ static int ct_write(struct intel_guc_ct *ct,
}
GEM_BUG_ON(tail > size);
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+ ct_track_lost_and_found(ct, fence,
+ FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, action[0]));
+#endif
+
/*
* make sure H2G buffer update and LRC tail update (if this triggering a
* submission) are visible before updating the descriptor tail
@@ -675,7 +698,7 @@ static int ct_send(struct intel_guc_ct *ct,
GEM_BUG_ON(!ct->enabled);
GEM_BUG_ON(!len);
- GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
+ GEM_BUG_ON(len > GUC_CTB_HXG_MSG_MAX_LEN - GUC_CTB_HDR_LEN);
GEM_BUG_ON(!response_buf && response_buf_size);
might_sleep();
@@ -953,6 +976,43 @@ corrupted:
return -EPIPE;
}
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+static bool ct_check_lost_and_found(struct intel_guc_ct *ct, u32 fence)
+{
+ unsigned int n;
+ char *buf = NULL;
+ bool found = false;
+
+ lockdep_assert_held(&ct->requests.lock);
+
+ for (n = 0; n < ARRAY_SIZE(ct->requests.lost_and_found); n++) {
+ if (ct->requests.lost_and_found[n].fence != fence)
+ continue;
+ found = true;
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+ buf = kmalloc(SZ_4K, GFP_NOWAIT);
+ if (buf && stack_depot_snprint(ct->requests.lost_and_found[n].stack,
+ buf, SZ_4K, 0)) {
+ CT_ERROR(ct, "Fence %u was used by action %#04x sent at\n%s",
+ fence, ct->requests.lost_and_found[n].action, buf);
+ break;
+ }
+#endif
+ CT_ERROR(ct, "Fence %u was used by action %#04x\n",
+ fence, ct->requests.lost_and_found[n].action);
+ break;
+ }
+ kfree(buf);
+ return found;
+}
+#else
+static bool ct_check_lost_and_found(struct intel_guc_ct *ct, u32 fence)
+{
+ return false;
+}
+#endif
+
static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *response)
{
u32 len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, response->msg[0]);
@@ -994,12 +1054,13 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
break;
}
if (!found) {
- CT_ERROR(ct, "Unsolicited response (fence %u)\n", fence);
- CT_ERROR(ct, "Could not find fence=%u, last_fence=%u\n", fence,
- ct->requests.last_fence);
- list_for_each_entry(req, &ct->requests.pending, link)
- CT_ERROR(ct, "request %u awaits response\n",
- req->fence);
+ CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n",
+ len, hxg[0], fence, ct->requests.last_fence);
+ if (!ct_check_lost_and_found(ct, fence)) {
+ list_for_each_entry(req, &ct->requests.pending, link)
+ CT_ERROR(ct, "request %u awaits response\n",
+ req->fence);
+ }
err = -ENOKEY;
}
spin_unlock_irqrestore(&ct->requests.lock, flags);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
index 818415b64f4d..58e42901ff49 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
@@ -8,6 +8,7 @@
#include <linux/interrupt.h>
#include <linux/spinlock.h>
+#include <linux/stackdepot.h>
#include <linux/workqueue.h>
#include <linux/ktime.h>
#include <linux/wait.h>
@@ -81,6 +82,16 @@ struct intel_guc_ct {
struct list_head incoming; /* incoming requests */
struct work_struct worker; /* handler for incoming requests */
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+ struct {
+ u16 fence;
+ u16 action;
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+ depot_stack_handle_t stack;
+#endif
+ } lost_and_found[SZ_16];
+#endif
} requests;
/** @stall_time: time of first time a CTB submission is stalled */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 4e57bd09d50d..b4d56eccfb1f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -35,13 +35,6 @@
#define GUC_MAX_CONTEXT_ID 65535
#define GUC_INVALID_CONTEXT_ID GUC_MAX_CONTEXT_ID
-#define GUC_RENDER_ENGINE 0
-#define GUC_VIDEO_ENGINE 1
-#define GUC_BLITTER_ENGINE 2
-#define GUC_VIDEOENHANCE_ENGINE 3
-#define GUC_VIDEO_ENGINE2 4
-#define GUC_MAX_ENGINES_NUM (GUC_VIDEO_ENGINE2 + 1)
-
#define GUC_RENDER_CLASS 0
#define GUC_VIDEO_CLASS 1
#define GUC_VIDEOENHANCE_CLASS 2
@@ -499,32 +492,6 @@ struct guc_log_buffer_state {
u32 version;
} __packed;
-struct guc_ctx_report {
- u32 report_return_status;
- u32 reserved1[64];
- u32 affected_count;
- u32 reserved2[2];
-} __packed;
-
-/* GuC Shared Context Data Struct */
-struct guc_shared_ctx_data {
- u32 addr_of_last_preempted_data_low;
- u32 addr_of_last_preempted_data_high;
- u32 addr_of_last_preempted_data_high_tmp;
- u32 padding;
- u32 is_mapped_to_proxy;
- u32 proxy_ctx_id;
- u32 engine_reset_ctx_id;
- u32 media_reset_count;
- u32 reserved1[8];
- u32 uk_last_ctx_switch_reason;
- u32 was_reset;
- u32 lrca_gpu_addr;
- u64 execlist_ctx;
- u32 reserved2[66];
- struct guc_ctx_report preempt_ctx_report[GUC_MAX_ENGINES_NUM];
-} __packed;
-
/* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */
enum intel_guc_recv_message {
INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1),
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index 04724ff56ded..e0afd8f89502 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -6,23 +6,16 @@
#include <linux/types.h>
#include "gt/intel_gt.h"
-#include "gt/intel_gt_print.h"
#include "intel_guc_reg.h"
#include "intel_huc.h"
+#include "intel_huc_print.h"
#include "i915_drv.h"
+#include "i915_reg.h"
+#include "pxp/intel_pxp_cmd_interface_43.h"
#include <linux/device/bus.h>
#include <linux/mei_aux.h>
-#define huc_printk(_huc, _level, _fmt, ...) \
- gt_##_level(huc_to_gt(_huc), "HuC: " _fmt, ##__VA_ARGS__)
-#define huc_err(_huc, _fmt, ...) huc_printk((_huc), err, _fmt, ##__VA_ARGS__)
-#define huc_warn(_huc, _fmt, ...) huc_printk((_huc), warn, _fmt, ##__VA_ARGS__)
-#define huc_notice(_huc, _fmt, ...) huc_printk((_huc), notice, _fmt, ##__VA_ARGS__)
-#define huc_info(_huc, _fmt, ...) huc_printk((_huc), info, _fmt, ##__VA_ARGS__)
-#define huc_dbg(_huc, _fmt, ...) huc_printk((_huc), dbg, _fmt, ##__VA_ARGS__)
-#define huc_probe_error(_huc, _fmt, ...) huc_printk((_huc), probe_error, _fmt, ##__VA_ARGS__)
-
/**
* DOC: HuC
*
@@ -31,15 +24,23 @@
* capabilities by adding HuC specific commands to batch buffers.
*
* The kernel driver is only responsible for loading the HuC firmware and
- * triggering its security authentication, which is performed by the GuC on
- * older platforms and by the GSC on newer ones. For the GuC to correctly
- * perform the authentication, the HuC binary must be loaded before the GuC one.
+ * triggering its security authentication. This is done differently depending
+ * on the platform:
+ * - older platforms (from Gen9 to most Gen12s): the load is performed via DMA
+ * and the authentication via GuC
+ * - DG2: load and authentication are both performed via GSC.
+ * - MTL and newer platforms: the load is performed via DMA (same as with
+ * not-DG2 older platforms), while the authentication is done in 2-steps,
+ * a first auth for clear-media workloads via GuC and a second one for all
+ * workloads via GSC.
+ * On platforms where the GuC does the authentication, to correctly do so the
+ * HuC binary must be loaded before the GuC one.
* Loading the HuC is optional; however, not using the HuC might negatively
* impact power usage and/or performance of media workloads, depending on the
* use-cases.
* HuC must be reloaded on events that cause the WOPCM to lose its contents
- * (S3/S4, FLR); GuC-authenticated HuC must also be reloaded on GuC/GT reset,
- * while GSC-managed HuC will survive that.
+ * (S3/S4, FLR); on older platforms the HuC must also be reloaded on GuC/GT
+ * reset, while on newer ones it will survive that.
*
* See https://github.com/intel/media-driver for the latest details on HuC
* functionality.
@@ -115,7 +116,7 @@ static enum hrtimer_restart huc_delayed_load_timer_callback(struct hrtimer *hrti
{
struct intel_huc *huc = container_of(hrtimer, struct intel_huc, delayed_load.timer);
- if (!intel_huc_is_authenticated(huc)) {
+ if (!intel_huc_is_authenticated(huc, INTEL_HUC_AUTH_BY_GSC)) {
if (huc->delayed_load.status == INTEL_HUC_WAITING_ON_GSC)
huc_notice(huc, "timed out waiting for MEI GSC\n");
else if (huc->delayed_load.status == INTEL_HUC_WAITING_ON_PXP)
@@ -133,7 +134,7 @@ static void huc_delayed_load_start(struct intel_huc *huc)
{
ktime_t delay;
- GEM_BUG_ON(intel_huc_is_authenticated(huc));
+ GEM_BUG_ON(intel_huc_is_authenticated(huc, INTEL_HUC_AUTH_BY_GSC));
/*
* On resume we don't have to wait for MEI-GSC to be re-probed, but we
@@ -276,7 +277,7 @@ void intel_huc_init_early(struct intel_huc *huc)
struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
struct intel_gt *gt = huc_to_gt(huc);
- intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC);
+ intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC, true);
/*
* we always init the fence as already completed, even if HuC is not
@@ -293,13 +294,23 @@ void intel_huc_init_early(struct intel_huc *huc)
}
if (GRAPHICS_VER(i915) >= 11) {
- huc->status.reg = GEN11_HUC_KERNEL_LOAD_INFO;
- huc->status.mask = HUC_LOAD_SUCCESSFUL;
- huc->status.value = HUC_LOAD_SUCCESSFUL;
+ huc->status[INTEL_HUC_AUTH_BY_GUC].reg = GEN11_HUC_KERNEL_LOAD_INFO;
+ huc->status[INTEL_HUC_AUTH_BY_GUC].mask = HUC_LOAD_SUCCESSFUL;
+ huc->status[INTEL_HUC_AUTH_BY_GUC].value = HUC_LOAD_SUCCESSFUL;
+ } else {
+ huc->status[INTEL_HUC_AUTH_BY_GUC].reg = HUC_STATUS2;
+ huc->status[INTEL_HUC_AUTH_BY_GUC].mask = HUC_FW_VERIFIED;
+ huc->status[INTEL_HUC_AUTH_BY_GUC].value = HUC_FW_VERIFIED;
+ }
+
+ if (IS_DG2(i915)) {
+ huc->status[INTEL_HUC_AUTH_BY_GSC].reg = GEN11_HUC_KERNEL_LOAD_INFO;
+ huc->status[INTEL_HUC_AUTH_BY_GSC].mask = HUC_LOAD_SUCCESSFUL;
+ huc->status[INTEL_HUC_AUTH_BY_GSC].value = HUC_LOAD_SUCCESSFUL;
} else {
- huc->status.reg = HUC_STATUS2;
- huc->status.mask = HUC_FW_VERIFIED;
- huc->status.value = HUC_FW_VERIFIED;
+ huc->status[INTEL_HUC_AUTH_BY_GSC].reg = HECI_FWSTS5(MTL_GSC_HECI1_BASE);
+ huc->status[INTEL_HUC_AUTH_BY_GSC].mask = HECI_FWSTS5_HUC_AUTH_DONE;
+ huc->status[INTEL_HUC_AUTH_BY_GSC].value = HECI_FWSTS5_HUC_AUTH_DONE;
}
}
@@ -307,51 +318,90 @@ void intel_huc_init_early(struct intel_huc *huc)
static int check_huc_loading_mode(struct intel_huc *huc)
{
struct intel_gt *gt = huc_to_gt(huc);
- bool fw_needs_gsc = intel_huc_is_loaded_by_gsc(huc);
- bool hw_uses_gsc = false;
+ bool gsc_enabled = huc->fw.has_gsc_headers;
/*
* The fuse for HuC load via GSC is only valid on platforms that have
* GuC deprivilege.
*/
if (HAS_GUC_DEPRIVILEGE(gt->i915))
- hw_uses_gsc = intel_uncore_read(gt->uncore, GUC_SHIM_CONTROL2) &
- GSC_LOADS_HUC;
+ huc->loaded_via_gsc = intel_uncore_read(gt->uncore, GUC_SHIM_CONTROL2) &
+ GSC_LOADS_HUC;
- if (fw_needs_gsc != hw_uses_gsc) {
- huc_err(huc, "mismatch between FW (%s) and HW (%s) load modes\n",
- HUC_LOAD_MODE_STRING(fw_needs_gsc), HUC_LOAD_MODE_STRING(hw_uses_gsc));
+ if (huc->loaded_via_gsc && !gsc_enabled) {
+ huc_err(huc, "HW requires a GSC-enabled blob, but we found a legacy one\n");
return -ENOEXEC;
}
- /* make sure we can access the GSC via the mei driver if we need it */
- if (!(IS_ENABLED(CONFIG_INTEL_MEI_PXP) && IS_ENABLED(CONFIG_INTEL_MEI_GSC)) &&
- fw_needs_gsc) {
- huc_info(huc, "can't load due to missing MEI modules\n");
- return -EIO;
+ /*
+ * On newer platforms we have GSC-enabled binaries but we load the HuC
+ * via DMA. To do so we need to find the location of the legacy-style
+ * binary inside the GSC-enabled one, which we do at fetch time. Make
+ * sure that we were able to do so if the fuse says we need to load via
+ * DMA and the binary is GSC-enabled.
+ */
+ if (!huc->loaded_via_gsc && gsc_enabled && !huc->fw.dma_start_offset) {
+ huc_err(huc, "HW in DMA mode, but we have an incompatible GSC-enabled blob\n");
+ return -ENOEXEC;
+ }
+
+ /*
+ * If the HuC is loaded via GSC, we need to be able to access the GSC.
+ * On DG2 this is done via the mei components, while on newer platforms
+ * it is done via the GSCCS,
+ */
+ if (huc->loaded_via_gsc) {
+ if (IS_DG2(gt->i915)) {
+ if (!IS_ENABLED(CONFIG_INTEL_MEI_PXP) ||
+ !IS_ENABLED(CONFIG_INTEL_MEI_GSC)) {
+ huc_info(huc, "can't load due to missing mei modules\n");
+ return -EIO;
+ }
+ } else {
+ if (!HAS_ENGINE(gt, GSC0)) {
+ huc_info(huc, "can't load due to missing GSCCS\n");
+ return -EIO;
+ }
+ }
}
- huc_dbg(huc, "loaded by GSC = %s\n", str_yes_no(fw_needs_gsc));
+ huc_dbg(huc, "loaded by GSC = %s\n", str_yes_no(huc->loaded_via_gsc));
return 0;
}
int intel_huc_init(struct intel_huc *huc)
{
+ struct intel_gt *gt = huc_to_gt(huc);
int err;
err = check_huc_loading_mode(huc);
if (err)
goto out;
+ if (HAS_ENGINE(gt, GSC0)) {
+ struct i915_vma *vma;
+
+ vma = intel_guc_allocate_vma(&gt->uc.guc, PXP43_HUC_AUTH_INOUT_SIZE * 2);
+ if (IS_ERR(vma)) {
+ huc_info(huc, "Failed to allocate heci pkt\n");
+ goto out;
+ }
+
+ huc->heci_pkt = vma;
+ }
+
err = intel_uc_fw_init(&huc->fw);
if (err)
- goto out;
+ goto out_pkt;
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOADABLE);
return 0;
+out_pkt:
+ if (huc->heci_pkt)
+ i915_vma_unpin_and_release(&huc->heci_pkt, 0);
out:
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_INIT_FAIL);
huc_info(huc, "initialization failed %pe\n", ERR_PTR(err));
@@ -366,6 +416,9 @@ void intel_huc_fini(struct intel_huc *huc)
*/
delayed_huc_load_fini(huc);
+ if (huc->heci_pkt)
+ i915_vma_unpin_and_release(&huc->heci_pkt, 0);
+
if (intel_uc_fw_is_loadable(&huc->fw))
intel_uc_fw_fini(&huc->fw);
}
@@ -383,34 +436,45 @@ void intel_huc_suspend(struct intel_huc *huc)
delayed_huc_load_complete(huc);
}
-int intel_huc_wait_for_auth_complete(struct intel_huc *huc)
+static const char *auth_mode_string(struct intel_huc *huc,
+ enum intel_huc_authentication_type type)
+{
+ bool partial = huc->fw.has_gsc_headers && type == INTEL_HUC_AUTH_BY_GUC;
+
+ return partial ? "clear media" : "all workloads";
+}
+
+int intel_huc_wait_for_auth_complete(struct intel_huc *huc,
+ enum intel_huc_authentication_type type)
{
struct intel_gt *gt = huc_to_gt(huc);
int ret;
ret = __intel_wait_for_register(gt->uncore,
- huc->status.reg,
- huc->status.mask,
- huc->status.value,
+ huc->status[type].reg,
+ huc->status[type].mask,
+ huc->status[type].value,
2, 50, NULL);
/* mark the load process as complete even if the wait failed */
delayed_huc_load_complete(huc);
if (ret) {
- huc_err(huc, "firmware not verified %pe\n", ERR_PTR(ret));
+ huc_err(huc, "firmware not verified for %s: %pe\n",
+ auth_mode_string(huc, type), ERR_PTR(ret));
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
return ret;
}
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
- huc_info(huc, "authenticated!\n");
+ huc_info(huc, "authenticated for %s\n", auth_mode_string(huc, type));
return 0;
}
/**
* intel_huc_auth() - Authenticate HuC uCode
* @huc: intel_huc structure
+ * @type: authentication type (via GuC or via GSC)
*
* Called after HuC and GuC firmware loading during intel_uc_init_hw().
*
@@ -418,7 +482,7 @@ int intel_huc_wait_for_auth_complete(struct intel_huc *huc)
* passing the offset of the RSA signature to intel_guc_auth_huc(). It then
* waits for up to 50ms for firmware verification ACK.
*/
-int intel_huc_auth(struct intel_huc *huc)
+int intel_huc_auth(struct intel_huc *huc, enum intel_huc_authentication_type type)
{
struct intel_gt *gt = huc_to_gt(huc);
struct intel_guc *guc = &gt->uc.guc;
@@ -427,44 +491,67 @@ int intel_huc_auth(struct intel_huc *huc)
if (!intel_uc_fw_is_loaded(&huc->fw))
return -ENOEXEC;
- /* GSC will do the auth */
+ /* GSC will do the auth with the load */
if (intel_huc_is_loaded_by_gsc(huc))
return -ENODEV;
+ if (intel_huc_is_authenticated(huc, type))
+ return -EEXIST;
+
ret = i915_inject_probe_error(gt->i915, -ENXIO);
if (ret)
goto fail;
- GEM_BUG_ON(intel_uc_fw_is_running(&huc->fw));
-
- ret = intel_guc_auth_huc(guc, intel_guc_ggtt_offset(guc, huc->fw.rsa_data));
- if (ret) {
- huc_err(huc, "authentication by GuC failed %pe\n", ERR_PTR(ret));
- goto fail;
+ switch (type) {
+ case INTEL_HUC_AUTH_BY_GUC:
+ ret = intel_guc_auth_huc(guc, intel_guc_ggtt_offset(guc, huc->fw.rsa_data));
+ break;
+ case INTEL_HUC_AUTH_BY_GSC:
+ ret = intel_huc_fw_auth_via_gsccs(huc);
+ break;
+ default:
+ MISSING_CASE(type);
+ ret = -EINVAL;
}
+ if (ret)
+ goto fail;
/* Check authentication status, it should be done by now */
- ret = intel_huc_wait_for_auth_complete(huc);
+ ret = intel_huc_wait_for_auth_complete(huc, type);
if (ret)
goto fail;
return 0;
fail:
- huc_probe_error(huc, "authentication failed %pe\n", ERR_PTR(ret));
+ huc_probe_error(huc, "%s authentication failed %pe\n",
+ auth_mode_string(huc, type), ERR_PTR(ret));
return ret;
}
-bool intel_huc_is_authenticated(struct intel_huc *huc)
+bool intel_huc_is_authenticated(struct intel_huc *huc,
+ enum intel_huc_authentication_type type)
{
struct intel_gt *gt = huc_to_gt(huc);
intel_wakeref_t wakeref;
u32 status = 0;
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
- status = intel_uncore_read(gt->uncore, huc->status.reg);
+ status = intel_uncore_read(gt->uncore, huc->status[type].reg);
+
+ return (status & huc->status[type].mask) == huc->status[type].value;
+}
+
+static bool huc_is_fully_authenticated(struct intel_huc *huc)
+{
+ struct intel_uc_fw *huc_fw = &huc->fw;
- return (status & huc->status.mask) == huc->status.value;
+ if (!huc_fw->has_gsc_headers)
+ return intel_huc_is_authenticated(huc, INTEL_HUC_AUTH_BY_GUC);
+ else if (intel_huc_is_loaded_by_gsc(huc) || HAS_ENGINE(huc_to_gt(huc), GSC0))
+ return intel_huc_is_authenticated(huc, INTEL_HUC_AUTH_BY_GSC);
+ else
+ return false;
}
/**
@@ -479,7 +566,9 @@ bool intel_huc_is_authenticated(struct intel_huc *huc)
*/
int intel_huc_check_status(struct intel_huc *huc)
{
- switch (__intel_uc_fw_status(&huc->fw)) {
+ struct intel_uc_fw *huc_fw = &huc->fw;
+
+ switch (__intel_uc_fw_status(huc_fw)) {
case INTEL_UC_FIRMWARE_NOT_SUPPORTED:
return -ENODEV;
case INTEL_UC_FIRMWARE_DISABLED:
@@ -496,7 +585,17 @@ int intel_huc_check_status(struct intel_huc *huc)
break;
}
- return intel_huc_is_authenticated(huc);
+ /*
+ * GSC-enabled binaries loaded via DMA are first partially
+ * authenticated by GuC and then fully authenticated by GSC
+ */
+ if (huc_is_fully_authenticated(huc))
+ return 1; /* full auth */
+ else if (huc_fw->has_gsc_headers && !intel_huc_is_loaded_by_gsc(huc) &&
+ intel_huc_is_authenticated(huc, INTEL_HUC_AUTH_BY_GUC))
+ return 2; /* clear media only */
+ else
+ return 0;
}
static bool huc_has_delayed_load(struct intel_huc *huc)
@@ -510,7 +609,10 @@ void intel_huc_update_auth_status(struct intel_huc *huc)
if (!intel_uc_fw_is_loadable(&huc->fw))
return;
- if (intel_huc_is_authenticated(huc))
+ if (!huc->fw.has_gsc_headers)
+ return;
+
+ if (huc_is_fully_authenticated(huc))
intel_uc_fw_change_status(&huc->fw,
INTEL_UC_FIRMWARE_RUNNING);
else if (huc_has_delayed_load(huc))
@@ -543,5 +645,5 @@ void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p)
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
drm_printf(p, "HuC status: 0x%08x\n",
- intel_uncore_read(gt->uncore, huc->status.reg));
+ intel_uncore_read(gt->uncore, huc->status[INTEL_HUC_AUTH_BY_GUC].reg));
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
index 0789184d81a2..ba5cb08e9e7b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -15,6 +15,7 @@
#include <linux/hrtimer.h>
struct bus_type;
+struct i915_vma;
enum intel_huc_delayed_load_status {
INTEL_HUC_WAITING_ON_GSC = 0,
@@ -22,6 +23,12 @@ enum intel_huc_delayed_load_status {
INTEL_HUC_DELAYED_LOAD_ERROR,
};
+enum intel_huc_authentication_type {
+ INTEL_HUC_AUTH_BY_GUC = 0,
+ INTEL_HUC_AUTH_BY_GSC,
+ INTEL_HUC_AUTH_MAX_MODES
+};
+
struct intel_huc {
/* Generic uC firmware management */
struct intel_uc_fw fw;
@@ -31,7 +38,7 @@ struct intel_huc {
i915_reg_t reg;
u32 mask;
u32 value;
- } status;
+ } status[INTEL_HUC_AUTH_MAX_MODES];
struct {
struct i915_sw_fence fence;
@@ -39,6 +46,11 @@ struct intel_huc {
struct notifier_block nb;
enum intel_huc_delayed_load_status status;
} delayed_load;
+
+ /* for load via GSCCS */
+ struct i915_vma *heci_pkt;
+
+ bool loaded_via_gsc;
};
int intel_huc_sanitize(struct intel_huc *huc);
@@ -46,11 +58,13 @@ void intel_huc_init_early(struct intel_huc *huc);
int intel_huc_init(struct intel_huc *huc);
void intel_huc_fini(struct intel_huc *huc);
void intel_huc_suspend(struct intel_huc *huc);
-int intel_huc_auth(struct intel_huc *huc);
-int intel_huc_wait_for_auth_complete(struct intel_huc *huc);
+int intel_huc_auth(struct intel_huc *huc, enum intel_huc_authentication_type type);
+int intel_huc_wait_for_auth_complete(struct intel_huc *huc,
+ enum intel_huc_authentication_type type);
+bool intel_huc_is_authenticated(struct intel_huc *huc,
+ enum intel_huc_authentication_type type);
int intel_huc_check_status(struct intel_huc *huc);
void intel_huc_update_auth_status(struct intel_huc *huc);
-bool intel_huc_is_authenticated(struct intel_huc *huc);
void intel_huc_register_gsc_notifier(struct intel_huc *huc, const struct bus_type *bus);
void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, const struct bus_type *bus);
@@ -73,13 +87,13 @@ static inline bool intel_huc_is_used(struct intel_huc *huc)
static inline bool intel_huc_is_loaded_by_gsc(const struct intel_huc *huc)
{
- return huc->fw.loaded_via_gsc;
+ return huc->loaded_via_gsc;
}
static inline bool intel_huc_wait_required(struct intel_huc *huc)
{
return intel_huc_is_used(huc) && intel_huc_is_loaded_by_gsc(huc) &&
- !intel_huc_is_authenticated(huc);
+ !intel_huc_is_authenticated(huc, INTEL_HUC_AUTH_BY_GSC);
}
void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
index 534b0aa43316..e608152fecfc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
@@ -5,10 +5,241 @@
#include "gt/intel_gsc.h"
#include "gt/intel_gt.h"
+#include "intel_gsc_binary_headers.h"
+#include "intel_gsc_uc_heci_cmd_submit.h"
#include "intel_huc.h"
#include "intel_huc_fw.h"
+#include "intel_huc_print.h"
#include "i915_drv.h"
#include "pxp/intel_pxp_huc.h"
+#include "pxp/intel_pxp_cmd_interface_43.h"
+
+struct mtl_huc_auth_msg_in {
+ struct intel_gsc_mtl_header header;
+ struct pxp43_new_huc_auth_in huc_in;
+} __packed;
+
+struct mtl_huc_auth_msg_out {
+ struct intel_gsc_mtl_header header;
+ struct pxp43_huc_auth_out huc_out;
+} __packed;
+
+int intel_huc_fw_auth_via_gsccs(struct intel_huc *huc)
+{
+ struct intel_gt *gt = huc_to_gt(huc);
+ struct drm_i915_private *i915 = gt->i915;
+ struct drm_i915_gem_object *obj;
+ struct mtl_huc_auth_msg_in *msg_in;
+ struct mtl_huc_auth_msg_out *msg_out;
+ void *pkt_vaddr;
+ u64 pkt_offset;
+ int retry = 5;
+ int err = 0;
+
+ if (!huc->heci_pkt)
+ return -ENODEV;
+
+ obj = huc->heci_pkt->obj;
+ pkt_offset = i915_ggtt_offset(huc->heci_pkt);
+
+ pkt_vaddr = i915_gem_object_pin_map_unlocked(obj,
+ i915_coherent_map_type(i915, obj, true));
+ if (IS_ERR(pkt_vaddr))
+ return PTR_ERR(pkt_vaddr);
+
+ msg_in = pkt_vaddr;
+ msg_out = pkt_vaddr + PXP43_HUC_AUTH_INOUT_SIZE;
+
+ intel_gsc_uc_heci_cmd_emit_mtl_header(&msg_in->header,
+ HECI_MEADDRESS_PXP,
+ sizeof(*msg_in), 0);
+
+ msg_in->huc_in.header.api_version = PXP_APIVER(4, 3);
+ msg_in->huc_in.header.command_id = PXP43_CMDID_NEW_HUC_AUTH;
+ msg_in->huc_in.header.status = 0;
+ msg_in->huc_in.header.buffer_len = sizeof(msg_in->huc_in) -
+ sizeof(msg_in->huc_in.header);
+ msg_in->huc_in.huc_base_address = huc->fw.vma_res.start;
+ msg_in->huc_in.huc_size = huc->fw.obj->base.size;
+
+ do {
+ err = intel_gsc_uc_heci_cmd_submit_packet(&gt->uc.gsc,
+ pkt_offset, sizeof(*msg_in),
+ pkt_offset + PXP43_HUC_AUTH_INOUT_SIZE,
+ PXP43_HUC_AUTH_INOUT_SIZE);
+ if (err) {
+ huc_err(huc, "failed to submit GSC request to auth: %d\n", err);
+ goto out_unpin;
+ }
+
+ if (msg_out->header.flags & GSC_OUTFLAG_MSG_PENDING) {
+ msg_in->header.gsc_message_handle = msg_out->header.gsc_message_handle;
+ err = -EBUSY;
+ msleep(50);
+ }
+ } while (--retry && err == -EBUSY);
+
+ if (err)
+ goto out_unpin;
+
+ if (msg_out->header.message_size != sizeof(*msg_out)) {
+ huc_err(huc, "invalid GSC reply length %u [expected %zu]\n",
+ msg_out->header.message_size, sizeof(*msg_out));
+ err = -EPROTO;
+ goto out_unpin;
+ }
+
+ /*
+ * The GSC will return PXP_STATUS_OP_NOT_PERMITTED if the HuC is already
+ * loaded. If the same error is ever returned with HuC not loaded we'll
+ * still catch it when we check the authentication bit later.
+ */
+ if (msg_out->huc_out.header.status != PXP_STATUS_SUCCESS &&
+ msg_out->huc_out.header.status != PXP_STATUS_OP_NOT_PERMITTED) {
+ huc_err(huc, "auth failed with GSC error = 0x%x\n",
+ msg_out->huc_out.header.status);
+ err = -EIO;
+ goto out_unpin;
+ }
+
+out_unpin:
+ i915_gem_object_unpin_map(obj);
+ return err;
+}
+
+static void get_version_from_gsc_manifest(struct intel_uc_fw_ver *ver, const void *data)
+{
+ const struct intel_gsc_manifest_header *manifest = data;
+
+ ver->major = manifest->fw_version.major;
+ ver->minor = manifest->fw_version.minor;
+ ver->patch = manifest->fw_version.hotfix;
+}
+
+static bool css_valid(const void *data, size_t size)
+{
+ const struct uc_css_header *css = data;
+
+ if (unlikely(size < sizeof(struct uc_css_header)))
+ return false;
+
+ if (css->module_type != 0x6)
+ return false;
+
+ if (css->module_vendor != PCI_VENDOR_ID_INTEL)
+ return false;
+
+ return true;
+}
+
+static inline u32 entry_offset(const struct intel_gsc_cpd_entry *entry)
+{
+ return entry->offset & INTEL_GSC_CPD_ENTRY_OFFSET_MASK;
+}
+
+int intel_huc_fw_get_binary_info(struct intel_uc_fw *huc_fw, const void *data, size_t size)
+{
+ struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw);
+ const struct intel_gsc_cpd_header_v2 *header = data;
+ const struct intel_gsc_cpd_entry *entry;
+ size_t min_size = sizeof(*header);
+ int i;
+
+ if (!huc_fw->has_gsc_headers) {
+ huc_err(huc, "Invalid FW type for GSC header parsing!\n");
+ return -EINVAL;
+ }
+
+ if (size < sizeof(*header)) {
+ huc_err(huc, "FW too small! %zu < %zu\n", size, min_size);
+ return -ENODATA;
+ }
+
+ /*
+ * The GSC-enabled HuC binary starts with a directory header, followed
+ * by a series of entries. Each entry is identified by a name and
+ * points to a specific section of the binary containing the relevant
+ * data. The entries we're interested in are:
+ * - "HUCP.man": points to the GSC manifest header for the HuC, which
+ * contains the version info.
+ * - "huc_fw": points to the legacy-style binary that can be used for
+ * load via the DMA. This entry only contains a valid CSS
+ * on binaries for platforms that support 2-step HuC load
+ * via dma and auth via GSC (like MTL).
+ *
+ * --------------------------------------------------
+ * [ intel_gsc_cpd_header_v2 ]
+ * --------------------------------------------------
+ * [ intel_gsc_cpd_entry[] ]
+ * [ entry1 ]
+ * [ ... ]
+ * [ entryX ]
+ * [ "HUCP.man" ]
+ * [ ... ]
+ * [ offset >----------------------------]------o
+ * [ ... ] |
+ * [ entryY ] |
+ * [ "huc_fw" ] |
+ * [ ... ] |
+ * [ offset >----------------------------]----------o
+ * -------------------------------------------------- | |
+ * | |
+ * -------------------------------------------------- | |
+ * [ intel_gsc_manifest_header ]<-----o |
+ * [ ... ] |
+ * [ intel_gsc_version fw_version ] |
+ * [ ... ] |
+ * -------------------------------------------------- |
+ * |
+ * -------------------------------------------------- |
+ * [ data[] ]<---------o
+ * [ ... ]
+ * [ ... ]
+ * --------------------------------------------------
+ */
+
+ if (header->header_marker != INTEL_GSC_CPD_HEADER_MARKER) {
+ huc_err(huc, "invalid marker for CPD header: 0x%08x!\n",
+ header->header_marker);
+ return -EINVAL;
+ }
+
+ /* we only have binaries with header v2 and entry v1 for now */
+ if (header->header_version != 2 || header->entry_version != 1) {
+ huc_err(huc, "invalid CPD header/entry version %u:%u!\n",
+ header->header_version, header->entry_version);
+ return -EINVAL;
+ }
+
+ if (header->header_length < sizeof(struct intel_gsc_cpd_header_v2)) {
+ huc_err(huc, "invalid CPD header length %u!\n",
+ header->header_length);
+ return -EINVAL;
+ }
+
+ min_size = header->header_length + sizeof(*entry) * header->num_of_entries;
+ if (size < min_size) {
+ huc_err(huc, "FW too small! %zu < %zu\n", size, min_size);
+ return -ENODATA;
+ }
+
+ entry = data + header->header_length;
+
+ for (i = 0; i < header->num_of_entries; i++, entry++) {
+ if (strcmp(entry->name, "HUCP.man") == 0)
+ get_version_from_gsc_manifest(&huc_fw->file_selected.ver,
+ data + entry_offset(entry));
+
+ if (strcmp(entry->name, "huc_fw") == 0) {
+ u32 offset = entry_offset(entry);
+
+ if (offset < size && css_valid(data + offset, size - offset))
+ huc_fw->dma_start_offset = offset;
+ }
+ }
+
+ return 0;
+}
int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc)
{
@@ -25,7 +256,7 @@ int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc)
* component gets re-bound and this function called again. If so, just
* mark the HuC as loaded.
*/
- if (intel_huc_is_authenticated(huc)) {
+ if (intel_huc_is_authenticated(huc, INTEL_HUC_AUTH_BY_GSC)) {
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
return 0;
}
@@ -38,7 +269,7 @@ int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc)
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_TRANSFERRED);
- return intel_huc_wait_for_auth_complete(huc);
+ return intel_huc_wait_for_auth_complete(huc, INTEL_HUC_AUTH_BY_GSC);
}
/**
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
index db42e238b45f..307ab45e6b09 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
@@ -7,8 +7,12 @@
#define _INTEL_HUC_FW_H_
struct intel_huc;
+struct intel_uc_fw;
+
+#include <linux/types.h>
int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc);
+int intel_huc_fw_auth_via_gsccs(struct intel_huc *huc);
int intel_huc_fw_upload(struct intel_huc *huc);
-
+int intel_huc_fw_get_binary_info(struct intel_uc_fw *huc_fw, const void *data, size_t size);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_print.h b/drivers/gpu/drm/i915/gt/uc/intel_huc_print.h
new file mode 100644
index 000000000000..915d310ee1df
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_print.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_HUC_PRINT__
+#define __INTEL_HUC_PRINT__
+
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_print.h"
+
+#define huc_printk(_huc, _level, _fmt, ...) \
+ gt_##_level(huc_to_gt(_huc), "HuC: " _fmt, ##__VA_ARGS__)
+#define huc_err(_huc, _fmt, ...) huc_printk((_huc), err, _fmt, ##__VA_ARGS__)
+#define huc_warn(_huc, _fmt, ...) huc_printk((_huc), warn, _fmt, ##__VA_ARGS__)
+#define huc_notice(_huc, _fmt, ...) huc_printk((_huc), notice, _fmt, ##__VA_ARGS__)
+#define huc_info(_huc, _fmt, ...) huc_printk((_huc), info, _fmt, ##__VA_ARGS__)
+#define huc_dbg(_huc, _fmt, ...) huc_printk((_huc), dbg, _fmt, ##__VA_ARGS__)
+#define huc_probe_error(_huc, _fmt, ...) huc_printk((_huc), probe_error, _fmt, ##__VA_ARGS__)
+
+#endif /* __INTEL_HUC_PRINT__ */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index c8b9cbb7ba3a..18250fb64bd8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -538,7 +538,7 @@ static int __uc_init_hw(struct intel_uc *uc)
if (intel_huc_is_loaded_by_gsc(huc))
intel_huc_update_auth_status(huc);
else
- intel_huc_auth(huc);
+ intel_huc_auth(huc, INTEL_HUC_AUTH_BY_GUC);
if (intel_uc_uses_guc_submission(uc)) {
ret = intel_guc_submission_enable(guc);
@@ -700,6 +700,12 @@ void intel_uc_suspend(struct intel_uc *uc)
}
}
+static void __uc_resume_mappings(struct intel_uc *uc)
+{
+ intel_uc_fw_resume_mapping(&uc->guc.fw);
+ intel_uc_fw_resume_mapping(&uc->huc.fw);
+}
+
static int __uc_resume(struct intel_uc *uc, bool enable_communication)
{
struct intel_guc *guc = &uc->guc;
@@ -767,4 +773,6 @@ static const struct intel_uc_ops uc_ops_on = {
.init_hw = __uc_init_hw,
.fini_hw = __uc_fini_hw,
+
+ .resume_mappings = __uc_resume_mappings,
};
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
index d585524d94de..014bb7d83689 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
@@ -24,6 +24,7 @@ struct intel_uc_ops {
void (*fini)(struct intel_uc *uc);
int (*init_hw)(struct intel_uc *uc);
void (*fini_hw)(struct intel_uc *uc);
+ void (*resume_mappings)(struct intel_uc *uc);
};
struct intel_uc {
@@ -114,6 +115,7 @@ intel_uc_ops_function(init, init, int, 0);
intel_uc_ops_function(fini, fini, void, );
intel_uc_ops_function(init_hw, init_hw, int, 0);
intel_uc_ops_function(fini_hw, fini_hw, void, );
+intel_uc_ops_function(resume_mappings, resume_mappings, void, );
#undef intel_uc_ops_function
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index dc5c96c503a9..944725e62414 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -108,6 +108,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
fw_def(SKYLAKE, 0, guc_mmp(skl, 70, 1, 1))
#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_raw, huc_mmp, huc_gsc) \
+ fw_def(METEORLAKE, 0, huc_gsc(mtl)) \
fw_def(DG2, 0, huc_gsc(dg2)) \
fw_def(ALDERLAKE_P, 0, huc_raw(tgl)) \
fw_def(ALDERLAKE_P, 0, huc_mmp(tgl, 7, 9, 3)) \
@@ -186,7 +187,7 @@ struct __packed uc_fw_blob {
u8 major;
u8 minor;
u8 patch;
- bool loaded_via_gsc;
+ bool has_gsc_headers;
};
#define UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
@@ -197,7 +198,7 @@ struct __packed uc_fw_blob {
#define UC_FW_BLOB_NEW(major_, minor_, patch_, gsc_, path_) \
{ UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
- .legacy = false, .loaded_via_gsc = gsc_ }
+ .legacy = false, .has_gsc_headers = gsc_ }
#define UC_FW_BLOB_OLD(major_, minor_, patch_, path_) \
{ UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
@@ -310,7 +311,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
uc_fw->file_wanted.ver.major = blob->major;
uc_fw->file_wanted.ver.minor = blob->minor;
uc_fw->file_wanted.ver.patch = blob->patch;
- uc_fw->loaded_via_gsc = blob->loaded_via_gsc;
+ uc_fw->has_gsc_headers = blob->has_gsc_headers;
found = true;
break;
}
@@ -471,12 +472,14 @@ static void __uc_fw_user_override(struct drm_i915_private *i915, struct intel_uc
* intel_uc_fw_init_early - initialize the uC object and select the firmware
* @uc_fw: uC firmware
* @type: type of uC
+ * @needs_ggtt_mapping: whether the FW needs to be GGTT mapped for loading
*
* Initialize the state of our uC object and relevant tracking and select the
* firmware to fetch and load.
*/
void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
- enum intel_uc_fw_type type)
+ enum intel_uc_fw_type type,
+ bool needs_ggtt_mapping)
{
struct intel_gt *gt = ____uc_fw_to_gt(uc_fw, type);
struct drm_i915_private *i915 = gt->i915;
@@ -490,6 +493,7 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
GEM_BUG_ON(uc_fw->file_selected.path);
uc_fw->type = type;
+ uc_fw->needs_ggtt_mapping = needs_ggtt_mapping;
if (HAS_GT_UC(i915)) {
if (!validate_fw_table_type(i915, type)) {
@@ -545,33 +549,6 @@ static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, int e)
}
}
-static int check_gsc_manifest(struct intel_gt *gt,
- const struct firmware *fw,
- struct intel_uc_fw *uc_fw)
-{
- u32 *dw = (u32 *)fw->data;
- u32 version_hi, version_lo;
- size_t min_size;
-
- /* Check the size of the blob before examining buffer contents */
- min_size = sizeof(u32) * (HUC_GSC_VERSION_LO_DW + 1);
- if (unlikely(fw->size < min_size)) {
- gt_warn(gt, "%s firmware %s: invalid size: %zu < %zu\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path,
- fw->size, min_size);
- return -ENODATA;
- }
-
- version_hi = dw[HUC_GSC_VERSION_HI_DW];
- version_lo = dw[HUC_GSC_VERSION_LO_DW];
-
- uc_fw->file_selected.ver.major = FIELD_GET(HUC_GSC_MAJOR_VER_HI_MASK, version_hi);
- uc_fw->file_selected.ver.minor = FIELD_GET(HUC_GSC_MINOR_VER_HI_MASK, version_hi);
- uc_fw->file_selected.ver.patch = FIELD_GET(HUC_GSC_PATCH_VER_LO_MASK, version_lo);
-
- return 0;
-}
-
static void uc_unpack_css_version(struct intel_uc_fw_ver *ver, u32 css_value)
{
/* Get version numbers from the CSS header */
@@ -628,22 +605,22 @@ static void guc_read_css_info(struct intel_uc_fw *uc_fw, struct uc_css_header *c
uc_fw->private_data_size = css->private_data_size;
}
-static int check_ccs_header(struct intel_gt *gt,
- const struct firmware *fw,
- struct intel_uc_fw *uc_fw)
+static int __check_ccs_header(struct intel_gt *gt,
+ const void *fw_data, size_t fw_size,
+ struct intel_uc_fw *uc_fw)
{
struct uc_css_header *css;
size_t size;
/* Check the size of the blob before examining buffer contents */
- if (unlikely(fw->size < sizeof(struct uc_css_header))) {
+ if (unlikely(fw_size < sizeof(struct uc_css_header))) {
gt_warn(gt, "%s firmware %s: invalid size: %zu < %zu\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path,
- fw->size, sizeof(struct uc_css_header));
+ fw_size, sizeof(struct uc_css_header));
return -ENODATA;
}
- css = (struct uc_css_header *)fw->data;
+ css = (struct uc_css_header *)fw_data;
/* Check integrity of size values inside CSS header */
size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw -
@@ -651,7 +628,7 @@ static int check_ccs_header(struct intel_gt *gt,
if (unlikely(size != sizeof(struct uc_css_header))) {
gt_warn(gt, "%s firmware %s: unexpected header size: %zu != %zu\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path,
- fw->size, sizeof(struct uc_css_header));
+ fw_size, sizeof(struct uc_css_header));
return -EPROTO;
}
@@ -663,10 +640,10 @@ static int check_ccs_header(struct intel_gt *gt,
/* At least, it should have header, uCode and RSA. Size of all three. */
size = sizeof(struct uc_css_header) + uc_fw->ucode_size + uc_fw->rsa_size;
- if (unlikely(fw->size < size)) {
+ if (unlikely(fw_size < size)) {
gt_warn(gt, "%s firmware %s: invalid size: %zu < %zu\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path,
- fw->size, size);
+ fw_size, size);
return -ENOEXEC;
}
@@ -687,6 +664,33 @@ static int check_ccs_header(struct intel_gt *gt,
return 0;
}
+static int check_gsc_manifest(struct intel_gt *gt,
+ const struct firmware *fw,
+ struct intel_uc_fw *uc_fw)
+{
+ if (uc_fw->type != INTEL_UC_FW_TYPE_HUC) {
+ gt_err(gt, "trying to GSC-parse a non-HuC binary");
+ return -EINVAL;
+ }
+
+ intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size);
+
+ if (uc_fw->dma_start_offset) {
+ u32 delta = uc_fw->dma_start_offset;
+
+ __check_ccs_header(gt, fw->data + delta, fw->size - delta, uc_fw);
+ }
+
+ return 0;
+}
+
+static int check_ccs_header(struct intel_gt *gt,
+ const struct firmware *fw,
+ struct intel_uc_fw *uc_fw)
+{
+ return __check_ccs_header(gt, fw->data, fw->size, uc_fw);
+}
+
static bool is_ver_8bit(struct intel_uc_fw_ver *ver)
{
return ver->major < 0xFF && ver->minor < 0xFF && ver->patch < 0xFF;
@@ -734,7 +738,7 @@ static int check_fw_header(struct intel_gt *gt,
if (uc_fw->type == INTEL_UC_FW_TYPE_GSC)
return 0;
- if (uc_fw->loaded_via_gsc)
+ if (uc_fw->has_gsc_headers)
err = check_gsc_manifest(gt, fw, uc_fw);
else
err = check_ccs_header(gt, fw, uc_fw);
@@ -755,7 +759,7 @@ static int try_firmware_load(struct intel_uc_fw *uc_fw, const struct firmware **
if (err)
return err;
- if ((*fw)->size > INTEL_UC_RSVD_GGTT_PER_FW) {
+ if (uc_fw->needs_ggtt_mapping && (*fw)->size > INTEL_UC_RSVD_GGTT_PER_FW) {
gt_err(gt, "%s firmware %s: size (%zuKB) exceeds max supported size (%uKB)\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path,
(*fw)->size / SZ_1K, INTEL_UC_RSVD_GGTT_PER_FW / SZ_1K);
@@ -940,29 +944,32 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
{
struct drm_i915_gem_object *obj = uc_fw->obj;
struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
- struct i915_vma_resource *dummy = &uc_fw->dummy;
+ struct i915_vma_resource *vma_res = &uc_fw->vma_res;
u32 pte_flags = 0;
- dummy->start = uc_fw_ggtt_offset(uc_fw);
- dummy->node_size = obj->base.size;
- dummy->bi.pages = obj->mm.pages;
+ if (!uc_fw->needs_ggtt_mapping)
+ return;
+
+ vma_res->start = uc_fw_ggtt_offset(uc_fw);
+ vma_res->node_size = obj->base.size;
+ vma_res->bi.pages = obj->mm.pages;
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
/* uc_fw->obj cache domains were not controlled across suspend */
if (i915_gem_object_has_struct_page(obj))
- drm_clflush_sg(dummy->bi.pages);
+ drm_clflush_sg(vma_res->bi.pages);
if (i915_gem_object_is_lmem(obj))
pte_flags |= PTE_LM;
if (ggtt->vm.raw_insert_entries)
- ggtt->vm.raw_insert_entries(&ggtt->vm, dummy,
+ ggtt->vm.raw_insert_entries(&ggtt->vm, vma_res,
i915_gem_get_pat_index(ggtt->vm.i915,
I915_CACHE_NONE),
pte_flags);
else
- ggtt->vm.insert_entries(&ggtt->vm, dummy,
+ ggtt->vm.insert_entries(&ggtt->vm, vma_res,
i915_gem_get_pat_index(ggtt->vm.i915,
I915_CACHE_NONE),
pte_flags);
@@ -970,11 +977,13 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw)
{
- struct drm_i915_gem_object *obj = uc_fw->obj;
struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
- u64 start = uc_fw_ggtt_offset(uc_fw);
+ struct i915_vma_resource *vma_res = &uc_fw->vma_res;
+
+ if (!vma_res->node_size)
+ return;
- ggtt->vm.clear_range(&ggtt->vm, start, obj->base.size);
+ ggtt->vm.clear_range(&ggtt->vm, vma_res->start, vma_res->node_size);
}
static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
@@ -991,7 +1000,7 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
/* Set the source address for the uCode */
- offset = uc_fw_ggtt_offset(uc_fw);
+ offset = uc_fw->vma_res.start + uc_fw->dma_start_offset;
GEM_BUG_ON(upper_32_bits(offset) & 0xFFFF0000);
intel_uncore_write_fw(uncore, DMA_ADDR_0_LOW, lower_32_bits(offset));
intel_uncore_write_fw(uncore, DMA_ADDR_0_HIGH, upper_32_bits(offset));
@@ -1065,9 +1074,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
return -ENOEXEC;
/* Call custom loader */
- uc_fw_bind_ggtt(uc_fw);
err = uc_fw_xfer(uc_fw, dst_offset, dma_flags);
- uc_fw_unbind_ggtt(uc_fw);
if (err)
goto fail;
@@ -1171,6 +1178,8 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
goto out_unpin;
}
+ uc_fw_bind_ggtt(uc_fw);
+
return 0;
out_unpin:
@@ -1181,6 +1190,7 @@ out:
void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
{
+ uc_fw_unbind_ggtt(uc_fw);
uc_fw_rsa_data_destroy(uc_fw);
if (i915_gem_object_has_pinned_pages(uc_fw->obj))
@@ -1189,6 +1199,17 @@ void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE);
}
+void intel_uc_fw_resume_mapping(struct intel_uc_fw *uc_fw)
+{
+ if (!intel_uc_fw_is_available(uc_fw))
+ return;
+
+ if (!i915_gem_object_has_pinned_pages(uc_fw->obj))
+ return;
+
+ uc_fw_bind_ggtt(uc_fw);
+}
+
/**
* intel_uc_fw_cleanup_fetch - cleanup uC firmware
* @uc_fw: uC firmware
@@ -1218,7 +1239,7 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len)
{
struct intel_memory_region *mr = uc_fw->obj->mm.region;
u32 size = min_t(u32, uc_fw->rsa_size, max_len);
- u32 offset = sizeof(struct uc_css_header) + uc_fw->ucode_size;
+ u32 offset = uc_fw->dma_start_offset + sizeof(struct uc_css_header) + uc_fw->ucode_size;
struct sgt_iter iter;
size_t count = 0;
int idx;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 6ba00e6b3975..054f02811971 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -99,20 +99,28 @@ struct intel_uc_fw {
struct drm_i915_gem_object *obj;
/**
- * @dummy: A vma used in binding the uc fw to ggtt. We can't define this
- * vma on the stack as it can lead to a stack overflow, so we define it
- * here. Safe to have 1 copy per uc fw because the binding is single
- * threaded as it done during driver load (inherently single threaded)
- * or during a GT reset (mutex guarantees single threaded).
+ * @needs_ggtt_mapping: indicates whether the fw object needs to be
+ * pinned to ggtt. If true, the fw is pinned at init time and unpinned
+ * during driver unload.
*/
- struct i915_vma_resource dummy;
+ bool needs_ggtt_mapping;
+
+ /**
+ * @vma_res: A vma resource used in binding the uc fw to ggtt. The fw is
+ * pinned in a reserved area of the ggtt (above the maximum address
+ * usable by GuC); therefore, we can't use the normal vma functions to
+ * do the pinning and we instead use this resource to do so.
+ */
+ struct i915_vma_resource vma_res;
struct i915_vma *rsa_data;
u32 rsa_size;
u32 ucode_size;
u32 private_data_size;
- bool loaded_via_gsc;
+ u32 dma_start_offset;
+
+ bool has_gsc_headers;
};
/*
@@ -282,12 +290,14 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)
}
void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
- enum intel_uc_fw_type type);
+ enum intel_uc_fw_type type,
+ bool needs_ggtt_mapping);
int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw);
void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw);
int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 offset, u32 dma_flags);
int intel_uc_fw_init(struct intel_uc_fw *uc_fw);
void intel_uc_fw_fini(struct intel_uc_fw *uc_fw);
+void intel_uc_fw_resume_mapping(struct intel_uc_fw *uc_fw);
size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len);
int intel_uc_fw_mark_load_failed(struct intel_uc_fw *uc_fw, int err);
void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
index 646fa8aa6cf1..7fe405126249 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
@@ -84,10 +84,4 @@ struct uc_css_header {
} __packed;
static_assert(sizeof(struct uc_css_header) == 128);
-#define HUC_GSC_VERSION_HI_DW 44
-#define HUC_GSC_MAJOR_VER_HI_MASK (0xFF << 0)
-#define HUC_GSC_MINOR_VER_HI_MASK (0xFF << 16)
-#define HUC_GSC_VERSION_LO_DW 45
-#define HUC_GSC_PATCH_VER_LO_MASK (0xFF << 0)
-
#endif /* _INTEL_UC_FW_ABI_H */
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 8e92649124d4..97244541ec28 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -243,8 +243,6 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
if (ret < 0)
goto err_rootgt;
- i915_drm_clients_init(&dev_priv->clients, dev_priv);
-
i915_gem_init_early(dev_priv);
/* This must be called before any calls to HAS_PCH_* */
@@ -278,7 +276,6 @@ static void i915_driver_late_release(struct drm_i915_private *dev_priv)
intel_power_domains_cleanup(dev_priv);
i915_gem_cleanup_early(dev_priv);
intel_gt_driver_late_release_all(dev_priv);
- i915_drm_clients_fini(&dev_priv->clients);
intel_region_ttm_device_fini(dev_priv);
vlv_suspend_cleanup(dev_priv);
i915_workqueues_cleanup(dev_priv);
@@ -1706,7 +1703,7 @@ static const struct file_operations i915_driver_fops = {
.compat_ioctl = i915_ioc32_compat_ioctl,
.llseek = noop_llseek,
#ifdef CONFIG_PROC_FS
- .show_fdinfo = i915_drm_client_fdinfo,
+ .show_fdinfo = drm_show_fdinfo,
#endif
};
@@ -1806,6 +1803,7 @@ static const struct drm_driver i915_drm_driver = {
.open = i915_driver_open,
.lastclose = i915_driver_lastclose,
.postclose = i915_driver_postclose,
+ .show_fdinfo = i915_drm_client_fdinfo,
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index d18d0a3ed905..2a44b3876cb5 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -17,64 +17,29 @@
#include "i915_gem.h"
#include "i915_utils.h"
-void i915_drm_clients_init(struct i915_drm_clients *clients,
- struct drm_i915_private *i915)
-{
- clients->i915 = i915;
- clients->next_id = 0;
-
- xa_init_flags(&clients->xarray, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
-}
-
-struct i915_drm_client *i915_drm_client_add(struct i915_drm_clients *clients)
+struct i915_drm_client *i915_drm_client_alloc(void)
{
struct i915_drm_client *client;
- struct xarray *xa = &clients->xarray;
- int ret;
client = kzalloc(sizeof(*client), GFP_KERNEL);
if (!client)
- return ERR_PTR(-ENOMEM);
-
- xa_lock_irq(xa);
- ret = __xa_alloc_cyclic(xa, &client->id, client, xa_limit_32b,
- &clients->next_id, GFP_KERNEL);
- xa_unlock_irq(xa);
- if (ret < 0)
- goto err;
+ return NULL;
kref_init(&client->kref);
spin_lock_init(&client->ctx_lock);
INIT_LIST_HEAD(&client->ctx_list);
- client->clients = clients;
return client;
-
-err:
- kfree(client);
-
- return ERR_PTR(ret);
}
void __i915_drm_client_free(struct kref *kref)
{
struct i915_drm_client *client =
container_of(kref, typeof(*client), kref);
- struct xarray *xa = &client->clients->xarray;
- unsigned long flags;
- xa_lock_irqsave(xa, flags);
- __xa_erase(xa, client->id);
- xa_unlock_irqrestore(xa, flags);
kfree(client);
}
-void i915_drm_clients_fini(struct i915_drm_clients *clients)
-{
- GEM_BUG_ON(!xa_empty(&clients->xarray));
- xa_destroy(&clients->xarray);
-}
-
#ifdef CONFIG_PROC_FS
static const char * const uabi_class_names[] = {
[I915_ENGINE_CLASS_RENDER] = "render",
@@ -101,38 +66,34 @@ static u64 busy_add(struct i915_gem_context *ctx, unsigned int class)
}
static void
-show_client_class(struct seq_file *m,
+show_client_class(struct drm_printer *p,
+ struct drm_i915_private *i915,
struct i915_drm_client *client,
unsigned int class)
{
- const struct list_head *list = &client->ctx_list;
+ const unsigned int capacity = i915->engine_uabi_class_count[class];
u64 total = atomic64_read(&client->past_runtime[class]);
- const unsigned int capacity =
- client->clients->i915->engine_uabi_class_count[class];
struct i915_gem_context *ctx;
rcu_read_lock();
- list_for_each_entry_rcu(ctx, list, client_link)
+ list_for_each_entry_rcu(ctx, &client->ctx_list, client_link)
total += busy_add(ctx, class);
rcu_read_unlock();
if (capacity)
- seq_printf(m, "drm-engine-%s:\t%llu ns\n",
+ drm_printf(p, "drm-engine-%s:\t%llu ns\n",
uabi_class_names[class], total);
if (capacity > 1)
- seq_printf(m, "drm-engine-capacity-%s:\t%u\n",
+ drm_printf(p, "drm-engine-capacity-%s:\t%u\n",
uabi_class_names[class],
capacity);
}
-void i915_drm_client_fdinfo(struct seq_file *m, struct file *f)
+void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
{
- struct drm_file *file = f->private_data;
struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_private *i915 = file_priv->i915;
- struct i915_drm_client *client = file_priv->client;
- struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
unsigned int i;
/*
@@ -141,16 +102,10 @@ void i915_drm_client_fdinfo(struct seq_file *m, struct file *f)
* ******************************************************************
*/
- seq_printf(m, "drm-driver:\t%s\n", i915->drm.driver->name);
- seq_printf(m, "drm-pdev:\t%04x:%02x:%02x.%d\n",
- pci_domain_nr(pdev->bus), pdev->bus->number,
- PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
- seq_printf(m, "drm-client-id:\t%u\n", client->id);
-
if (GRAPHICS_VER(i915) < 8)
return;
for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
- show_client_class(m, client, i);
+ show_client_class(p, i915, file_priv->client, i);
}
#endif
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index 69496af996d9..4c18b99e10a4 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -9,20 +9,13 @@
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/spinlock.h>
-#include <linux/xarray.h>
#include <uapi/drm/i915_drm.h>
#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
-struct drm_i915_private;
-
-struct i915_drm_clients {
- struct drm_i915_private *i915;
-
- struct xarray xarray;
- u32 next_id;
-};
+struct drm_file;
+struct drm_printer;
struct i915_drm_client {
struct kref kref;
@@ -32,17 +25,12 @@ struct i915_drm_client {
spinlock_t ctx_lock; /* For add/remove from ctx_list. */
struct list_head ctx_list; /* List of contexts belonging to client. */
- struct i915_drm_clients *clients;
-
/**
* @past_runtime: Accumulation of pphwsp runtimes from closed contexts.
*/
atomic64_t past_runtime[I915_LAST_UABI_ENGINE_CLASS + 1];
};
-void i915_drm_clients_init(struct i915_drm_clients *clients,
- struct drm_i915_private *i915);
-
static inline struct i915_drm_client *
i915_drm_client_get(struct i915_drm_client *client)
{
@@ -57,12 +45,10 @@ static inline void i915_drm_client_put(struct i915_drm_client *client)
kref_put(&client->kref, __i915_drm_client_free);
}
-struct i915_drm_client *i915_drm_client_add(struct i915_drm_clients *clients);
+struct i915_drm_client *i915_drm_client_alloc(void);
#ifdef CONFIG_PROC_FS
-void i915_drm_client_fdinfo(struct seq_file *m, struct file *f);
+void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file);
#endif
-void i915_drm_clients_fini(struct i915_drm_clients *clients);
-
#endif /* !__I915_DRM_CLIENT_H__ */
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f1205ed3ba71..b457a37e67c4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -314,7 +314,7 @@ struct drm_i915_private {
/*
* i915->gt[0] == &i915->gt0
*/
-#define I915_MAX_GT 4
+#define I915_MAX_GT 2
struct intel_gt *gt[I915_MAX_GT];
struct kobject *sysfs_gt;
@@ -348,8 +348,6 @@ struct drm_i915_private {
struct i915_pmu pmu;
- struct i915_drm_clients clients;
-
/* The TTM device structure. */
struct ttm_device bdev;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e70b762f0b03..1f65bb33dd21 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1325,11 +1325,9 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
if (!file_priv)
goto err_alloc;
- client = i915_drm_client_add(&i915->clients);
- if (IS_ERR(client)) {
- ret = PTR_ERR(client);
+ client = i915_drm_client_alloc();
+ if (!client)
goto err_client;
- }
file->driver_priv = file_priv;
file_priv->i915 = i915;
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index 6f11d7eaa91a..890f2b382bee 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -100,7 +100,11 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
value = sseu->min_eu_in_pool;
break;
case I915_PARAM_HUC_STATUS:
- value = intel_huc_check_status(&to_gt(i915)->uc.huc);
+ /* On platform with a media GT, the HuC is on that GT */
+ if (i915->media_gt)
+ value = intel_huc_check_status(&i915->media_gt->uc.huc);
+ else
+ value = intel_huc_check_status(&to_gt(i915)->uc.huc);
if (value < 0)
return value;
break;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 19d5652300ee..0a111b281578 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -531,8 +531,7 @@ static void oa_context_id_squash(struct i915_perf_stream *stream, u32 *report)
* (See description of OA_TAIL_MARGIN_NSEC above for further details.)
*
* Besides returning true when there is data available to read() this function
- * also updates the tail, aging_tail and aging_timestamp in the oa_buffer
- * object.
+ * also updates the tail in the oa_buffer object.
*
* Note: It's safe to read OA config state here unlocked, assuming that this is
* only called while the stream is enabled, while the global OA configuration
@@ -544,10 +543,10 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
{
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
int report_size = stream->oa_buffer.format->size;
+ u32 head, tail, read_tail;
unsigned long flags;
bool pollin;
u32 hw_tail;
- u64 now;
u32 partial_report_size;
/* We have to consider the (unlikely) possibility that read() errors
@@ -566,64 +565,48 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
partial_report_size %= report_size;
/* Subtract partial amount off the tail */
- hw_tail = gtt_offset + OA_TAKEN(hw_tail, partial_report_size);
+ hw_tail = OA_TAKEN(hw_tail, partial_report_size);
- now = ktime_get_mono_fast_ns();
+ /* NB: The head we observe here might effectively be a little
+ * out of date. If a read() is in progress, the head could be
+ * anywhere between this head and stream->oa_buffer.tail.
+ */
+ head = stream->oa_buffer.head - gtt_offset;
+ read_tail = stream->oa_buffer.tail - gtt_offset;
- if (hw_tail == stream->oa_buffer.aging_tail &&
- (now - stream->oa_buffer.aging_timestamp) > OA_TAIL_MARGIN_NSEC) {
- /* If the HW tail hasn't move since the last check and the HW
- * tail has been aging for long enough, declare it the new
- * tail.
- */
- stream->oa_buffer.tail = stream->oa_buffer.aging_tail;
- } else {
- u32 head, tail, aged_tail;
+ tail = hw_tail;
- /* NB: The head we observe here might effectively be a little
- * out of date. If a read() is in progress, the head could be
- * anywhere between this head and stream->oa_buffer.tail.
- */
- head = stream->oa_buffer.head - gtt_offset;
- aged_tail = stream->oa_buffer.tail - gtt_offset;
-
- hw_tail -= gtt_offset;
- tail = hw_tail;
-
- /* Walk the stream backward until we find a report with report
- * id and timestmap not at 0. Since the circular buffer pointers
- * progress by increments of 64 bytes and that reports can be up
- * to 256 bytes long, we can't tell whether a report has fully
- * landed in memory before the report id and timestamp of the
- * following report have effectively landed.
- *
- * This is assuming that the writes of the OA unit land in
- * memory in the order they were written to.
- * If not : (╯°□°)╯︵ ┻━┻
- */
- while (OA_TAKEN(tail, aged_tail) >= report_size) {
- void *report = stream->oa_buffer.vaddr + tail;
+ /* Walk the stream backward until we find a report with report
+ * id and timestmap not at 0. Since the circular buffer pointers
+ * progress by increments of 64 bytes and that reports can be up
+ * to 256 bytes long, we can't tell whether a report has fully
+ * landed in memory before the report id and timestamp of the
+ * following report have effectively landed.
+ *
+ * This is assuming that the writes of the OA unit land in
+ * memory in the order they were written to.
+ * If not : (╯°□°)╯︵ ┻━┻
+ */
+ while (OA_TAKEN(tail, read_tail) >= report_size) {
+ void *report = stream->oa_buffer.vaddr + tail;
- if (oa_report_id(stream, report) ||
- oa_timestamp(stream, report))
- break;
+ if (oa_report_id(stream, report) ||
+ oa_timestamp(stream, report))
+ break;
- tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
- }
+ tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
+ }
- if (OA_TAKEN(hw_tail, tail) > report_size &&
- __ratelimit(&stream->perf->tail_pointer_race))
- drm_notice(&stream->uncore->i915->drm,
- "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n",
- head, tail, hw_tail);
+ if (OA_TAKEN(hw_tail, tail) > report_size &&
+ __ratelimit(&stream->perf->tail_pointer_race))
+ drm_notice(&stream->uncore->i915->drm,
+ "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n",
+ head, tail, hw_tail);
- stream->oa_buffer.tail = gtt_offset + tail;
- stream->oa_buffer.aging_tail = gtt_offset + hw_tail;
- stream->oa_buffer.aging_timestamp = now;
- }
+ stream->oa_buffer.tail = gtt_offset + tail;
- pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
- stream->oa_buffer.head - gtt_offset) >= report_size;
+ pollin = OA_TAKEN(stream->oa_buffer.tail,
+ stream->oa_buffer.head) >= report_size;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
@@ -877,12 +860,17 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
stream->oa_buffer.last_ctx_id = ctx_id;
}
- /*
- * Clear out the report id and timestamp as a means to detect unlanded
- * reports.
- */
- oa_report_id_clear(stream, report32);
- oa_timestamp_clear(stream, report32);
+ if (is_power_of_2(report_size)) {
+ /*
+ * Clear out the report id and timestamp as a means
+ * to detect unlanded reports.
+ */
+ oa_report_id_clear(stream, report32);
+ oa_timestamp_clear(stream, report32);
+ } else {
+ /* Zero out the entire report */
+ memset(report32, 0, report_size);
+ }
}
if (start_offset != *offset) {
@@ -1722,7 +1710,6 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
gtt_offset | OABUFFER_SIZE_16M);
/* Mark that we need updated tail pointers to read from... */
- stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
stream->oa_buffer.tail = gtt_offset;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
@@ -1774,7 +1761,6 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
/* Mark that we need updated tail pointers to read from... */
- stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
stream->oa_buffer.tail = gtt_offset;
/*
@@ -1828,7 +1814,6 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
gtt_offset & GEN12_OAG_OATAILPTR_MASK);
/* Mark that we need updated tail pointers to read from... */
- stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
stream->oa_buffer.tail = gtt_offset;
/*
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index 66dd5f74de05..fe3a5dae8c22 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -313,18 +313,6 @@ struct i915_perf_stream {
spinlock_t ptr_lock;
/**
- * @aging_tail: The last HW tail reported by HW. The data
- * might not have made it to memory yet though.
- */
- u32 aging_tail;
-
- /**
- * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
- * was read; used to determine when it is old enough to trust.
- */
- u64 aging_timestamp;
-
- /**
* @head: Although we can always read back the head pointer register,
* we prefer to avoid trusting the HW state, just to avoid any
* risk that some hardware condition could * somehow bump the
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index a814583e19fd..d35973b41186 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -132,14 +132,14 @@ static u32 frequency_enabled_mask(void)
unsigned int i;
u32 mask = 0;
- for (i = 0; i < I915_PMU_MAX_GTS; i++)
+ for (i = 0; i < I915_PMU_MAX_GT; i++)
mask |= config_mask(__I915_PMU_ACTUAL_FREQUENCY(i)) |
config_mask(__I915_PMU_REQUESTED_FREQUENCY(i));
return mask;
}
-static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
+static bool pmu_needs_timer(struct i915_pmu *pmu)
{
struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
u32 enable;
@@ -158,16 +158,10 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK;
/*
- * When the GPU is idle per-engine counters do not need to be
- * running so clear those bits out.
- */
- if (!gpu_active)
- enable &= ~ENGINE_SAMPLE_MASK;
- /*
* Also there is software busyness tracking available we do not
* need the timer for I915_SAMPLE_BUSY counter.
*/
- else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
+ if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
enable &= ~BIT(I915_SAMPLE_BUSY);
/*
@@ -197,31 +191,21 @@ static inline s64 ktime_since_raw(const ktime_t kt)
return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
}
-static unsigned int
-__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
- unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
-
- GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
-
- return idx;
-}
-
static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
{
- return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
+ return pmu->sample[gt_id][sample].cur;
}
static void
store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
{
- pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
+ pmu->sample[gt_id][sample].cur = val;
}
static void
add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
{
- pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
+ pmu->sample[gt_id][sample].cur += mul_u32_u32(val, mul);
}
static u64 get_rc6(struct intel_gt *gt)
@@ -295,7 +279,7 @@ static void park_rc6(struct intel_gt *gt)
static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
{
- if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
+ if (!pmu->timer_enabled && pmu_needs_timer(pmu)) {
pmu->timer_enabled = true;
pmu->timer_last = ktime_get();
hrtimer_start_range_ns(&pmu->timer,
@@ -321,7 +305,7 @@ void i915_pmu_gt_parked(struct intel_gt *gt)
*/
pmu->unparked &= ~BIT(gt->info.id);
if (pmu->unparked == 0)
- pmu->timer_enabled = pmu_needs_timer(pmu, false);
+ pmu->timer_enabled = false;
spin_unlock_irq(&pmu->lock);
}
@@ -827,7 +811,7 @@ static void i915_pmu_disable(struct perf_event *event)
*/
if (--pmu->enable_count[bit] == 0) {
pmu->enable &= ~BIT(bit);
- pmu->timer_enabled &= pmu_needs_timer(pmu, true);
+ pmu->timer_enabled &= pmu_needs_timer(pmu);
}
spin_unlock_irqrestore(&pmu->lock, flags);
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 33d80fbaab8b..41af038c3738 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -38,7 +38,7 @@ enum {
__I915_NUM_PMU_SAMPLERS
};
-#define I915_PMU_MAX_GTS 2
+#define I915_PMU_MAX_GT 2
/*
* How many different events we track in the global PMU mask.
@@ -47,7 +47,7 @@ enum {
*/
#define I915_PMU_MASK_BITS \
(I915_ENGINE_SAMPLE_COUNT + \
- I915_PMU_MAX_GTS * __I915_PMU_TRACKED_EVENT_COUNT)
+ I915_PMU_MAX_GT * __I915_PMU_TRACKED_EVENT_COUNT)
#define I915_ENGINE_SAMPLE_COUNT (I915_SAMPLE_SEMA + 1)
@@ -127,11 +127,11 @@ struct i915_pmu {
* Only global counters are held here, while the per-engine ones are in
* struct intel_engine_cs.
*/
- struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
+ struct i915_pmu_sample sample[I915_PMU_MAX_GT][__I915_NUM_PMU_SAMPLERS];
/**
* @sleep_last: Last time GT parked for RC6 estimation.
*/
- ktime_t sleep_last[I915_PMU_MAX_GTS];
+ ktime_t sleep_last[I915_PMU_MAX_GT];
/**
* @irq_count: Number of interrupts
*
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 0523418129c5..8ed7c39c2b30 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -941,6 +941,9 @@
#define HECI_H_GS1(base) _MMIO((base) + 0xc4c)
#define HECI_H_GS1_ER_PREP REG_BIT(0)
+#define HECI_FWSTS5(base) _MMIO((base) + 0xc68)
+#define HECI_FWSTS5_HUC_AUTH_DONE (1 << 19)
+
#define HSW_GTT_CACHE_EN _MMIO(0x4024)
#define GTT_CACHE_EN_ALL 0xF0007FFF
#define GEN7_WR_WATERMARK _MMIO(0x4028)
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h
index 09777719cd84..0165d38fbead 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h
@@ -11,19 +11,30 @@
/* PXP-Cmd-Op definitions */
#define PXP43_CMDID_START_HUC_AUTH 0x0000003A
+#define PXP43_CMDID_NEW_HUC_AUTH 0x0000003F /* MTL+ */
#define PXP43_CMDID_INIT_SESSION 0x00000036
/* PXP-Packet sizes for MTL's GSCCS-HECI instruction */
#define PXP43_MAX_HECI_INOUT_SIZE (SZ_32K)
-/* PXP-Input-Packet: HUC-Authentication */
+/* PXP-Packet size for MTL's NEW_HUC_AUTH instruction */
+#define PXP43_HUC_AUTH_INOUT_SIZE (SZ_4K)
+
+/* PXP-Input-Packet: HUC Load and Authentication */
struct pxp43_start_huc_auth_in {
struct pxp_cmd_header header;
__le64 huc_base_address;
} __packed;
-/* PXP-Output-Packet: HUC-Authentication */
-struct pxp43_start_huc_auth_out {
+/* PXP-Input-Packet: HUC Auth-only */
+struct pxp43_new_huc_auth_in {
+ struct pxp_cmd_header header;
+ u64 huc_base_address;
+ u32 huc_size;
+} __packed;
+
+/* PXP-Output-Packet: HUC Load and Authentication or Auth-only */
+struct pxp43_huc_auth_out {
struct pxp_cmd_header header;
} __packed;
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c
index 8dc41de3f6f7..f13890ec7db1 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c
@@ -143,7 +143,7 @@ gsccs_send_message(struct intel_pxp *pxp,
reply_size = header->message_size - sizeof(*header);
if (reply_size > msg_out_size_max) {
- drm_warn(&i915->drm, "caller with insufficient PXP reply size %u (%ld)\n",
+ drm_warn(&i915->drm, "caller with insufficient PXP reply size %u (%zu)\n",
reply_size, msg_out_size_max);
reply_size = msg_out_size_max;
}
@@ -196,7 +196,7 @@ bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp)
* gsc-proxy init flow (the last set of dependencies that
* are out of order) will suffice.
*/
- if (intel_huc_is_authenticated(&pxp->ctrl_gt->uc.huc) &&
+ if (intel_huc_is_authenticated(&pxp->ctrl_gt->uc.huc, INTEL_HUC_AUTH_BY_GSC) &&
intel_gsc_uc_fw_proxy_init_done(&pxp->ctrl_gt->uc.gsc))
return true;
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c b/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c
index 23431c36b60b..5eedce916942 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c
@@ -19,7 +19,7 @@ int intel_pxp_huc_load_and_auth(struct intel_pxp *pxp)
struct intel_gt *gt;
struct intel_huc *huc;
struct pxp43_start_huc_auth_in huc_in = {0};
- struct pxp43_start_huc_auth_out huc_out = {0};
+ struct pxp43_huc_auth_out huc_out = {0};
dma_addr_t huc_phys_addr;
u8 client_id = 0;
u8 fence_id = 0;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index f31dfacde601..7000e5910a1d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -674,7 +674,8 @@ typedef struct drm_i915_irq_wait {
* If the IOCTL is successful, the returned parameter will be set to one of the
* following values:
* * 0 if HuC firmware load is not complete,
- * * 1 if HuC firmware is authenticated and running.
+ * * 1 if HuC firmware is loaded and fully authenticated,
+ * * 2 if HuC firmware is loaded and authenticated for clear media only
*/
#define I915_PARAM_HUC_STATUS 42
@@ -3679,9 +3680,13 @@ struct drm_i915_gem_create_ext {
*
* For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
* struct drm_i915_gem_create_ext_protected_content.
+ *
+ * For I915_GEM_CREATE_EXT_SET_PAT usage see
+ * struct drm_i915_gem_create_ext_set_pat.
*/
#define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
+#define I915_GEM_CREATE_EXT_SET_PAT 2
__u64 extensions;
};
@@ -3796,6 +3801,43 @@ struct drm_i915_gem_create_ext_protected_content {
__u32 flags;
};
+/**
+ * struct drm_i915_gem_create_ext_set_pat - The
+ * I915_GEM_CREATE_EXT_SET_PAT extension.
+ *
+ * If this extension is provided, the specified caching policy (PAT index) is
+ * applied to the buffer object.
+ *
+ * Below is an example on how to create an object with specific caching policy:
+ *
+ * .. code-block:: C
+ *
+ * struct drm_i915_gem_create_ext_set_pat set_pat_ext = {
+ * .base = { .name = I915_GEM_CREATE_EXT_SET_PAT },
+ * .pat_index = 0,
+ * };
+ * struct drm_i915_gem_create_ext create_ext = {
+ * .size = PAGE_SIZE,
+ * .extensions = (uintptr_t)&set_pat_ext,
+ * };
+ *
+ * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
+ * if (err) ...
+ */
+struct drm_i915_gem_create_ext_set_pat {
+ /** @base: Extension link. See struct i915_user_extension. */
+ struct i915_user_extension base;
+ /**
+ * @pat_index: PAT index to be set
+ * PAT index is a bit field in Page Table Entry to control caching
+ * behaviors for GPU accesses. The definition of PAT index is
+ * platform dependent and can be found in hardware specifications,
+ */
+ __u32 pat_index;
+ /** @rsvd: reserved for future use */
+ __u32 rsvd;
+};
+
/* ID of the protected content session managed by i915 when PXP is active */
#define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf