summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-21 14:56:17 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-21 14:56:17 -0800
commit28eb75e178d389d325f1666e422bc13bbbb9804c (patch)
tree20417b4e798f98fc5687e80c1e0126afcf437c70 /drivers/gpu/drm/xe
parent071b34dcf71523a559b6c39f5d21a268a9531b50 (diff)
parenta163b895077861598be48c1cf7f4a88413c28b22 (diff)
Merge tag 'drm-next-2024-11-21' of https://gitlab.freedesktop.org/drm/kernel
Pull drm updates from Dave Airlie: "There's a lot of rework, the panic helper support is being added to more drivers, v3d gets support for HW superpages, scheduler documentation, drm client and video aperture reworks, some new MAINTAINERS added, amdgpu has the usual lots of IP refactors, Intel has some Pantherlake enablement and xe is getting some SRIOV bits, but just lots of stuff everywhere. core: - split DSC helpers from DP helpers - clang build fixes for drm/mm test - drop simple pipeline support for gem vram - document submission error signaling - move drm_rect to drm core module from kms helper - add default client setup to most drivers - move to video aperture helpers instead of drm ones tests: - new framebuffer tests ttm: - remove swapped and pinned BOs from TTM lru panic: - fix uninit spinlock - add ABGR2101010 support bridge: - add TI TDP158 support - use standard PM OPS dma-fence: - use read_trylock instead of read_lock to help lockdep scheduler: - add errno to sched start to report different errors - add locking to drm_sched_entity_modify_sched - improve documentation xe: - add drm_line_printer - lots of refactoring - Enable Xe2 + PES disaggregation - add new ARL PCI ID - SRIOV development work - fix exec unnecessary implicit fence - define and parse OA sync props - forcewake refactoring i915: - Enable BMG/LNL ultra joiner - Enable 10bpx + CCS scanout on ICL+, fp16/CCS on TGL+ - use DSB for plane/color mgmt - Arrow lake PCI IDs - lots of i915/xe display refactoring - enable PXP GuC autoteardown - Pantherlake (PTL) Xe3 LPD display enablement - Allow fastset HDR infoframe changes - write DP source OUI for non-eDP sinks - share PCI IDs between i915 and xe amdgpu: - SDMA queue reset support - SMU 13.0.6, JPEG 4.0.3 updates - Initial runtime repartitioning support - rework IP structs for multiple IP instances - Fetch EDID from _DDC if available - SMU13 zero rpm user control - lots of fixes/cleanups amdkfd: - Increase event FIFO size - add topology cap flag for per queue reset msm: - DPU: - SA8775P support - (disabled by default) MSM8917, MSM8937, MSM8953 and MSM8996 support - Enable large framebuffer support - Drop MSM8998 and SDM845 - DP: - SA8775P support - GPU: - a7xx preemption support - Adreno A663 support ast: - warn about unsupported TX chips ivpu: - add coredump - add pantherlake support rockchip: - 4K@60Hz display enablement - generate pll programming tables panthor: - add timestamp query API - add realtime group priority - add fdinfo support etnaviv: - improve handling of DMA address limits - improve GPU hangcheck exynos: - Decon Exynos7870 support mediatek: - add OF graph support omap: - locking fixes bochs: - convert to gem/shmem from simpledrm v3d: - support big/super pages - add gemfs vc4: - BCM2712 support refactoring - add YUV444 format support udmabuf: - folio related fixes nouveau: - add panic support on nv50+" * tag 'drm-next-2024-11-21' of https://gitlab.freedesktop.org/drm/kernel: (1583 commits) drm/xe/guc: Fix dereference before NULL check drm/amd: Fix initialization mistake for NBIO 7.7.0 Revert "drm/amd/display: parse umc_info or vram_info based on ASIC" drm/amd/display: Fix failure to read vram info due to static BP_RESULT drm/amdgpu: enable GTT fallback handling for dGPUs only drm/amd/amdgpu: limit single process inside MES drm/fourcc: add AMD_FMT_MOD_TILE_GFX9_4K_D_X drm/amdgpu/mes12: correct kiq unmap latency drm/amdgpu: Support vcn and jpeg error info parsing drm/amd : Update MES API header file for v11 & v12 drm/amd/amdkfd: add/remove kfd queues on start/stop KFD scheduling drm/amdkfd: change kfd process kref count at creation drm/amdgpu: Cleanup shift coding style drm/amd/amdgpu: Increase MES log buffer to dump mes scratch data drm/amdgpu: Implement virt req_ras_err_count drm/amdgpu: VF Query RAS Caps from Host if supported drm/amdgpu: Add msg handlers for SRIOV RAS Telemetry drm/amdgpu: Update SRIOV Exchange Headers for RAS Telemetry Support drm/amd/display: 3.2.309 drm/amd/display: Adjust VSDB parser for replay feature ...
Diffstat (limited to 'drivers/gpu/drm/xe')
-rw-r--r--drivers/gpu/drm/xe/Kconfig2
-rw-r--r--drivers/gpu/drm/xe/Kconfig.debug12
-rw-r--r--drivers/gpu/drm/xe/Makefile9
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_abi.h8
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h61
-rw-r--r--drivers/gpu/drm/xe/abi/guc_capture_abi.h186
-rw-r--r--drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h1
-rw-r--r--drivers/gpu/drm/xe/abi/guc_klvs_abi.h1
-rw-r--r--drivers/gpu/drm/xe/abi/guc_log_abi.h75
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h1
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h17
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h64
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h12
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_types.h11
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h2
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h14
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h8
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h17
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h17
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h60
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h4
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h10
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/soc/intel_rom.h6
-rw-r--r--drivers/gpu/drm/xe/display/ext/i915_irq.c33
-rw-r--r--drivers/gpu/drm/xe/display/intel_bo.c84
-rw-r--r--drivers/gpu/drm/xe/display/intel_fb_bo.c19
-rw-r--r--drivers/gpu/drm/xe/display/intel_fb_bo.h24
-rw-r--r--drivers/gpu/drm/xe/display/intel_fbdev_fb.c12
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c71
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.h4
-rw-r--r--drivers/gpu/drm/xe/display/xe_dsb_buffer.c9
-rw-r--r--drivers/gpu/drm/xe/display/xe_fb_pin.c69
-rw-r--r--drivers/gpu/drm/xe/display/xe_hdcp_gsc.c52
-rw-r--r--drivers/gpu/drm/xe/display/xe_plane_initial.c4
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h1
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h69
-rw-r--r--drivers/gpu/drm/xe/regs/xe_guc_regs.h2
-rw-r--r--drivers/gpu/drm/xe/regs/xe_irq_regs.h82
-rw-r--r--drivers/gpu/drm/xe/regs/xe_reg_defs.h2
-rw-r--r--drivers/gpu/drm/xe/regs/xe_regs.h14
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo.c240
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs.c22
-rw-r--r--drivers/gpu/drm/xe/xe_assert.h2
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c67
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h10
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h8
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.c27
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c171
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.h6
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump_types.h21
-rw-r--r--drivers/gpu/drm/xe/xe_device.c131
-rw-r--r--drivers/gpu/drm/xe/xe_device.h15
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h62
-rw-r--r--drivers/gpu/drm/xe/xe_drm_client.c8
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_execlist.c21
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake.c126
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake.h23
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c16
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.c49
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.c13
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c145
-rw-r--r--drivers/gpu/drm/xe/xe_gt.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_ccs_mode.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_clock.c6
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.c26
-rw-r--r--drivers/gpu/drm/xe/xe_gt_freq.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.c137
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.c68
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_printk.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.c56
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c239
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c44
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c132
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c419
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h24
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h40
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c6
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_throttle.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c13
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.c22
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h22
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c88
-rw-r--r--drivers/gpu/drm/xe/xe_guc.h5
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c167
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture.c1975
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture.h61
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture_types.h68
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c474
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.h9
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct_types.h29
-rw-r--r--drivers/gpu/drm/xe/xe_guc_debugfs.c14
-rw-r--r--drivers/gpu/drm/xe/xe_guc_fwif.h27
-rw-r--r--drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h7
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.c313
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.h15
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log_types.h34
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c84
-rw-r--r--drivers/gpu/drm/xe/xe_guc_relay.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c96
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_huc.c14
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.c307
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.h6
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_types.h68
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c16
-rw-r--r--drivers/gpu/drm/xe/xe_irq.c78
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt.c2
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c26
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.h19
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.c203
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.h6
-rw-r--r--drivers/gpu/drm/xe/xe_memirq_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.c139
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.h39
-rw-r--r--drivers/gpu/drm/xe/xe_mocs.c31
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c726
-rw-r--r--drivers/gpu/drm/xe/xe_oa_types.h12
-rw-r--r--drivers/gpu/drm/xe/xe_pat.c88
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c104
-rw-r--r--drivers/gpu/drm/xe/xe_pcode.c4
-rw-r--r--drivers/gpu/drm/xe/xe_platform_types.h1
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c2
-rw-r--r--drivers/gpu/drm/xe/xe_query.c93
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr.c33
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.c2
-rw-r--r--drivers/gpu/drm/xe/xe_sa.c2
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.c2
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.c5
-rw-r--r--drivers/gpu/drm/xe/xe_sync.c2
-rw-r--r--drivers/gpu/drm/xe/xe_tile.c3
-rw-r--r--drivers/gpu/drm/xe/xe_trace.h7
-rw-r--r--drivers/gpu/drm/xe/xe_trace_bo.h2
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c8
-rw-r--r--drivers/gpu/drm/xe/xe_tuning.c10
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.c19
-rw-r--r--drivers/gpu/drm/xe/xe_vram.c19
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c58
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules4
-rw-r--r--drivers/gpu/drm/xe/xe_wopcm.c15
153 files changed, 7297 insertions, 1990 deletions
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 116f58774135..b51a2bde73e2 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -8,12 +8,14 @@ config DRM_XE
select SHMEM
select TMPFS
select DRM_BUDDY
+ select DRM_CLIENT_SELECTION
select DRM_EXEC
select DRM_KMS_HELPER
select DRM_KUNIT_TEST_HELPERS if DRM_XE_KUNIT_TEST != n
select DRM_PANEL
select DRM_SUBALLOC_HELPER
select DRM_DISPLAY_DP_HELPER
+ select DRM_DISPLAY_DSC_HELPER
select DRM_DISPLAY_HDCP_HELPER
select DRM_DISPLAY_HDMI_HELPER
select DRM_DISPLAY_HELPER
diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
index bc177368af6c..2de0de41b8dd 100644
--- a/drivers/gpu/drm/xe/Kconfig.debug
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -40,9 +40,21 @@ config DRM_XE_DEBUG_VM
If in doubt, say "N".
+config DRM_XE_DEBUG_MEMIRQ
+ bool "Enable extra memirq debugging"
+ default n
+ help
+ Choose this option to enable additional debugging info for
+ memory based interrupts.
+
+ Recommended for driver developers only.
+
+ If in doubt, say "N".
+
config DRM_XE_DEBUG_SRIOV
bool "Enable extra SR-IOV debugging"
default n
+ select DRM_XE_DEBUG_MEMIRQ
help
Enable extra SR-IOV debugging info.
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index edfd812e0f41..bc7a04ce69fd 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -56,6 +56,7 @@ xe-y += xe_bb.o \
xe_gt_topology.o \
xe_guc.o \
xe_guc_ads.o \
+ xe_guc_capture.o \
xe_guc_ct.o \
xe_guc_db_mgr.o \
xe_guc_hwconfig.o \
@@ -129,6 +130,7 @@ xe-$(CONFIG_PCI_IOV) += \
xe_gt_sriov_pf.o \
xe_gt_sriov_pf_config.o \
xe_gt_sriov_pf_control.o \
+ xe_gt_sriov_pf_migration.o \
xe_gt_sriov_pf_monitor.o \
xe_gt_sriov_pf_policy.o \
xe_gt_sriov_pf_service.o \
@@ -148,7 +150,6 @@ subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \
-I$(src)/display/ext \
-I$(src)/compat-i915-headers \
-I$(srctree)/drivers/gpu/drm/i915/display/ \
- -Ddrm_i915_gem_object=xe_bo \
-Ddrm_i915_private=xe_device
# Rule to build SOC code shared with i915
@@ -165,6 +166,7 @@ $(obj)/i915-display/%.o: $(srctree)/drivers/gpu/drm/i915/display/%.c FORCE
xe-$(CONFIG_DRM_XE_DISPLAY) += \
display/ext/i915_irq.o \
display/ext/i915_utils.o \
+ display/intel_bo.o \
display/intel_fb_bo.o \
display/intel_fbdev_fb.o \
display/xe_display.o \
@@ -180,7 +182,8 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
# SOC code shared with i915
xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-soc/intel_dram.o \
- i915-soc/intel_pch.o
+ i915-soc/intel_pch.o \
+ i915-soc/intel_rom.o
# Display code shared with i915
xe-$(CONFIG_DRM_XE_DISPLAY) += \
@@ -220,6 +223,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-display/intel_dp_hdcp.o \
i915-display/intel_dp_link_training.o \
i915-display/intel_dp_mst.o \
+ i915-display/intel_dp_test.o \
i915-display/intel_dpll.o \
i915-display/intel_dpll_mgr.o \
i915-display/intel_dpt_common.o \
@@ -248,6 +252,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-display/intel_modeset_setup.o \
i915-display/intel_modeset_verify.o \
i915-display/intel_panel.o \
+ i915-display/intel_pfit.o \
i915-display/intel_pmdemand.o \
i915-display/intel_pps.o \
i915-display/intel_psr.o \
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
index 43ad4652c2b2..b54fe40fc5a9 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -176,6 +176,14 @@ enum xe_guc_sleep_state_status {
#define GUC_LOG_CONTROL_VERBOSITY_MASK (0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT)
#define GUC_LOG_CONTROL_DEFAULT_LOGGING (1 << 8)
+enum xe_guc_state_capture_event_status {
+ XE_GUC_STATE_CAPTURE_EVENT_STATUS_SUCCESS = 0x0,
+ XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE = 0x1,
+};
+
+#define XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK 0x000000FF
+#define XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN 1
+
#define XE_GUC_TLB_INVAL_TYPE_SHIFT 0
#define XE_GUC_TLB_INVAL_MODE_SHIFT 8
/* Flush PPC or SMRO caches along with TLB invalidation request */
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
index 181180f5945c..b6a1852749dd 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
@@ -557,4 +557,65 @@
#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_2_VALUE64 GUC_HXG_REQUEST_MSG_n_DATAn
#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_3_VALUE96 GUC_HXG_REQUEST_MSG_n_DATAn
+/**
+ * DOC: PF2GUC_SAVE_RESTORE_VF
+ *
+ * This message is used by the PF to migrate VF info state maintained by the GuC.
+ *
+ * This message must be sent as `CTB HXG Message`_.
+ *
+ * Available since GuC version 70.25.0
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | DATA0 = **OPCODE** - operation to take: |
+ * | | | |
+ * | | | - _`GUC_PF_OPCODE_VF_SAVE` = 0 |
+ * | | | - _`GUC_PF_OPCODE_VF_RESTORE` = 1 |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`GUC_ACTION_PF2GUC_SAVE_RESTORE_VF` = 0x550B |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | **VFID** - VF identifier |
+ * +---+-------+--------------------------------------------------------------+
+ * | 2 | 31:0 | **ADDR_LO** - lower 32-bits of GGTT offset to the buffer |
+ * | | | where the VF info will be save to or restored from. |
+ * +---+-------+--------------------------------------------------------------+
+ * | 3 | 31:0 | **ADDR_HI** - upper 32-bits of GGTT offset to the buffer |
+ * | | | where the VF info will be save to or restored from. |
+ * +---+-------+--------------------------------------------------------------+
+ * | 4 | 27:0 | **SIZE** - size of the buffer (in dwords) |
+ * | +-------+--------------------------------------------------------------+
+ * | | 31:28 | MBZ |
+ * +---+-------+--------------------------------------------------------------+
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:0 | DATA0 = **USED** - size of used buffer space (in dwords) |
+ * +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_PF2GUC_SAVE_RESTORE_VF 0x550Bu
+
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_LEN (GUC_HXG_EVENT_MSG_MIN_LEN + 4u)
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_0_OPCODE GUC_HXG_EVENT_MSG_0_DATA0
+#define GUC_PF_OPCODE_VF_SAVE 0u
+#define GUC_PF_OPCODE_VF_RESTORE 1u
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_1_VFID GUC_HXG_EVENT_MSG_n_DATAn
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_2_ADDR_LO GUC_HXG_EVENT_MSG_n_DATAn
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_3_ADDR_HI GUC_HXG_EVENT_MSG_n_DATAn
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_4_SIZE (0xfffffffu << 0)
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_4_MBZ (0xfu << 28)
+
+#define PF2GUC_SAVE_RESTORE_VF_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define PF2GUC_SAVE_RESTORE_VF_RESPONSE_MSG_0_USED GUC_HXG_RESPONSE_MSG_0_DATA0
+
#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_capture_abi.h b/drivers/gpu/drm/xe/abi/guc_capture_abi.h
new file mode 100644
index 000000000000..e7898edc6236
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_capture_abi.h
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_CAPTURE_ABI_H
+#define _ABI_GUC_CAPTURE_ABI_H
+
+#include <linux/types.h>
+
+/* Capture List Index */
+enum guc_capture_list_index_type {
+ GUC_CAPTURE_LIST_INDEX_PF = 0,
+ GUC_CAPTURE_LIST_INDEX_VF = 1,
+};
+
+#define GUC_CAPTURE_LIST_INDEX_MAX (GUC_CAPTURE_LIST_INDEX_VF + 1)
+
+/* Register-types of GuC capture register lists */
+enum guc_state_capture_type {
+ GUC_STATE_CAPTURE_TYPE_GLOBAL = 0,
+ GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
+ GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE
+};
+
+#define GUC_STATE_CAPTURE_TYPE_MAX (GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE + 1)
+
+/* Class indecies for capture_class and capture_instance arrays */
+enum guc_capture_list_class_type {
+ GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE = 0,
+ GUC_CAPTURE_LIST_CLASS_VIDEO = 1,
+ GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE = 2,
+ GUC_CAPTURE_LIST_CLASS_BLITTER = 3,
+ GUC_CAPTURE_LIST_CLASS_GSC_OTHER = 4,
+};
+
+#define GUC_CAPTURE_LIST_CLASS_MAX (GUC_CAPTURE_LIST_CLASS_GSC_OTHER + 1)
+
+/**
+ * struct guc_mmio_reg - GuC MMIO reg state struct
+ *
+ * GuC MMIO reg state struct
+ */
+struct guc_mmio_reg {
+ /** @offset: MMIO Offset - filled in by Host */
+ u32 offset;
+ /** @value: MMIO Value - Used by Firmware to store value */
+ u32 value;
+ /** @flags: Flags for accessing the MMIO */
+ u32 flags;
+ /** @mask: Value of a mask to apply if mask with value is set */
+ u32 mask;
+#define GUC_REGSET_MASKED BIT(0)
+#define GUC_REGSET_STEERING_NEEDED BIT(1)
+#define GUC_REGSET_MASKED_WITH_VALUE BIT(2)
+#define GUC_REGSET_RESTORE_ONLY BIT(3)
+#define GUC_REGSET_STEERING_GROUP GENMASK(16, 12)
+#define GUC_REGSET_STEERING_INSTANCE GENMASK(23, 20)
+} __packed;
+
+/**
+ * struct guc_mmio_reg_set - GuC register sets
+ *
+ * GuC register sets
+ */
+struct guc_mmio_reg_set {
+ /** @address: register address */
+ u32 address;
+ /** @count: register count */
+ u16 count;
+ /** @reserved: reserved */
+ u16 reserved;
+} __packed;
+
+/**
+ * struct guc_debug_capture_list_header - Debug capture list header.
+ *
+ * Debug capture list header.
+ */
+struct guc_debug_capture_list_header {
+ /** @info: contains number of MMIO descriptors in the capture list. */
+ u32 info;
+#define GUC_CAPTURELISTHDR_NUMDESCR GENMASK(15, 0)
+} __packed;
+
+/**
+ * struct guc_debug_capture_list - Debug capture list
+ *
+ * As part of ADS registration, these header structures (followed by
+ * an array of 'struct guc_mmio_reg' entries) are used to register with
+ * GuC microkernel the list of registers we want it to dump out prior
+ * to a engine reset.
+ */
+struct guc_debug_capture_list {
+ /** @header: Debug capture list header. */
+ struct guc_debug_capture_list_header header;
+ /** @regs: MMIO descriptors in the capture list. */
+ struct guc_mmio_reg regs[];
+} __packed;
+
+/**
+ * struct guc_state_capture_header_t - State capture header.
+ *
+ * Prior to resetting engines that have hung or faulted, GuC microkernel
+ * reports the engine error-state (register values that was read) by
+ * logging them into the shared GuC log buffer using these hierarchy
+ * of structures.
+ */
+struct guc_state_capture_header_t {
+ /**
+ * @owner: VFID
+ * BR[ 7: 0] MBZ when SRIOV is disabled. When SRIOV is enabled
+ * VFID is an integer in range [0, 63] where 0 means the state capture
+ * is corresponding to the PF and an integer N in range [1, 63] means
+ * the state capture is for VF N.
+ */
+ u32 owner;
+#define GUC_STATE_CAPTURE_HEADER_VFID GENMASK(7, 0)
+ /** @info: Engine class/instance and capture type info */
+ u32 info;
+#define GUC_STATE_CAPTURE_HEADER_CAPTURE_TYPE GENMASK(3, 0) /* see guc_state_capture_type */
+#define GUC_STATE_CAPTURE_HEADER_ENGINE_CLASS GENMASK(7, 4) /* see guc_capture_list_class_type */
+#define GUC_STATE_CAPTURE_HEADER_ENGINE_INSTANCE GENMASK(11, 8)
+ /**
+ * @lrca: logical ring context address.
+ * if type-instance, LRCA (address) that hung, else set to ~0
+ */
+ u32 lrca;
+ /**
+ * @guc_id: context_index.
+ * if type-instance, context index of hung context, else set to ~0
+ */
+ u32 guc_id;
+ /** @num_mmio_entries: Number of captured MMIO entries. */
+ u32 num_mmio_entries;
+#define GUC_STATE_CAPTURE_HEADER_NUM_MMIO_ENTRIES GENMASK(9, 0)
+} __packed;
+
+/**
+ * struct guc_state_capture_t - State capture.
+ *
+ * State capture
+ */
+struct guc_state_capture_t {
+ /** @header: State capture header. */
+ struct guc_state_capture_header_t header;
+ /** @mmio_entries: Array of captured guc_mmio_reg entries. */
+ struct guc_mmio_reg mmio_entries[];
+} __packed;
+
+/* State Capture Group Type */
+enum guc_state_capture_group_type {
+ GUC_STATE_CAPTURE_GROUP_TYPE_FULL = 0,
+ GUC_STATE_CAPTURE_GROUP_TYPE_PARTIAL
+};
+
+#define GUC_STATE_CAPTURE_GROUP_TYPE_MAX (GUC_STATE_CAPTURE_GROUP_TYPE_PARTIAL + 1)
+
+/**
+ * struct guc_state_capture_group_header_t - State capture group header
+ *
+ * State capture group header.
+ */
+struct guc_state_capture_group_header_t {
+ /** @owner: VFID */
+ u32 owner;
+#define GUC_STATE_CAPTURE_GROUP_HEADER_VFID GENMASK(7, 0)
+ /** @info: Engine class/instance and capture type info */
+ u32 info;
+#define GUC_STATE_CAPTURE_GROUP_HEADER_NUM_CAPTURES GENMASK(7, 0)
+#define GUC_STATE_CAPTURE_GROUP_HEADER_CAPTURE_GROUP_TYPE GENMASK(15, 8)
+} __packed;
+
+/**
+ * struct guc_state_capture_group_t - State capture group.
+ *
+ * this is the top level structure where an error-capture dump starts
+ */
+struct guc_state_capture_group_t {
+ /** @grp_header: State capture group header. */
+ struct guc_state_capture_group_header_t grp_header;
+ /** @capture_entries: Array of state captures */
+ struct guc_state_capture_t capture_entries[];
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
index 8f86a16dc577..f58198cf2cf6 100644
--- a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
@@ -52,6 +52,7 @@ struct guc_ct_buffer_desc {
#define GUC_CTB_STATUS_OVERFLOW (1 << 0)
#define GUC_CTB_STATUS_UNDERFLOW (1 << 1)
#define GUC_CTB_STATUS_MISMATCH (1 << 2)
+#define GUC_CTB_STATUS_DISABLED (1 << 3)
u32 reserved[13];
} __packed;
static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
index 6b30743a2f6c..37606cf8cc5e 100644
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -352,6 +352,7 @@ enum xe_guc_klv_ids {
GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007,
GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008,
GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009,
+ GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO = 0x900a,
};
#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_log_abi.h b/drivers/gpu/drm/xe/abi/guc_log_abi.h
new file mode 100644
index 000000000000..554630b7ccd9
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_log_abi.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_LOG_ABI_H
+#define _ABI_GUC_LOG_ABI_H
+
+#include <linux/types.h>
+
+/* GuC logging buffer types */
+enum guc_log_buffer_type {
+ GUC_LOG_BUFFER_CRASH_DUMP,
+ GUC_LOG_BUFFER_DEBUG,
+ GUC_LOG_BUFFER_CAPTURE,
+};
+
+#define GUC_LOG_BUFFER_TYPE_MAX 3
+
+/**
+ * struct guc_log_buffer_state - GuC log buffer state
+ *
+ * Below state structure is used for coordination of retrieval of GuC firmware
+ * logs. Separate state is maintained for each log buffer type.
+ * read_ptr points to the location where Xe read last in log buffer and
+ * is read only for GuC firmware. write_ptr is incremented by GuC with number
+ * of bytes written for each log entry and is read only for Xe.
+ * When any type of log buffer becomes half full, GuC sends a flush interrupt.
+ * GuC firmware expects that while it is writing to 2nd half of the buffer,
+ * first half would get consumed by Host and then get a flush completed
+ * acknowledgment from Host, so that it does not end up doing any overwrite
+ * causing loss of logs. So when buffer gets half filled & Xe has requested
+ * for interrupt, GuC will set flush_to_file field, set the sampled_write_ptr
+ * to the value of write_ptr and raise the interrupt.
+ * On receiving the interrupt Xe should read the buffer, clear flush_to_file
+ * field and also update read_ptr with the value of sample_write_ptr, before
+ * sending an acknowledgment to GuC. marker & version fields are for internal
+ * usage of GuC and opaque to Xe. buffer_full_cnt field is incremented every
+ * time GuC detects the log buffer overflow.
+ */
+struct guc_log_buffer_state {
+ /** @marker: buffer state start marker */
+ u32 marker[2];
+ /** @read_ptr: the last byte offset that was read by KMD previously */
+ u32 read_ptr;
+ /**
+ * @write_ptr: the next byte offset location that will be written by
+ * GuC
+ */
+ u32 write_ptr;
+ /** @size: Log buffer size */
+ u32 size;
+ /**
+ * @sampled_write_ptr: Log buffer write pointer
+ * This is written by GuC to the byte offset of the next free entry in
+ * the buffer on log buffer half full or state capture notification
+ */
+ u32 sampled_write_ptr;
+ /**
+ * @wrap_offset: wraparound offset
+ * This is the byte offset of location 1 byte after last valid guc log
+ * event entry written by Guc firmware before there was a wraparound.
+ * This field is updated by guc firmware and should be used by Host
+ * when copying buffer contents to file.
+ */
+ u32 wrap_offset;
+ /** @flags: Flush to file flag and buffer full count */
+ u32 flags;
+#define GUC_LOG_BUFFER_STATE_FLUSH_TO_FILE GENMASK(0, 0)
+#define GUC_LOG_BUFFER_STATE_BUFFER_FULL_CNT GENMASK(4, 1)
+ /** @version: The Guc-Log-Entry format version */
+ u32 version;
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h
deleted file mode 100644
index 710cecca972d..000000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h
+++ /dev/null
@@ -1 +0,0 @@
-/* Empty */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h
deleted file mode 100644
index 650ea2803a97..000000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef _I915_GEM_MMAN_H_
-#define _I915_GEM_MMAN_H_
-
-#include "xe_bo_types.h"
-#include <drm/drm_prime.h>
-
-static inline int i915_gem_fb_mmap(struct xe_bo *bo, struct vm_area_struct *vma)
-{
- return drm_gem_prime_mmap(&bo->ttm.base, vma);
-}
-
-#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
deleted file mode 100644
index 777c20ceabab..000000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2022 Intel Corporation
- */
-
-#ifndef _I915_GEM_OBJECT_H_
-#define _I915_GEM_OBJECT_H_
-
-#include <linux/types.h>
-
-#include "xe_bo.h"
-
-#define i915_gem_object_is_shmem(obj) (0) /* We don't use shmem */
-
-static inline dma_addr_t i915_gem_object_get_dma_address(const struct xe_bo *bo, pgoff_t n)
-{
- /* Should never be called */
- WARN_ON(1);
- return n;
-}
-
-static inline bool i915_gem_object_is_tiled(const struct xe_bo *bo)
-{
- /* legacy tiling is unused */
- return false;
-}
-
-static inline bool i915_gem_object_is_userptr(const struct xe_bo *bo)
-{
- /* legacy tiling is unused */
- return false;
-}
-
-static inline int i915_gem_object_read_from_page(struct xe_bo *bo,
- u32 ofs, u64 *ptr, u32 size)
-{
- struct ttm_bo_kmap_obj map;
- void *src;
- bool is_iomem;
- int ret;
-
- ret = xe_bo_lock(bo, true);
- if (ret)
- return ret;
-
- ret = ttm_bo_kmap(&bo->ttm, ofs >> PAGE_SHIFT, 1, &map);
- if (ret)
- goto out_unlock;
-
- ofs &= ~PAGE_MASK;
- src = ttm_kmap_obj_virtual(&map, &is_iomem);
- src += ofs;
- if (is_iomem)
- memcpy_fromio(ptr, (void __iomem *)src, size);
- else
- memcpy(ptr, src, size);
-
- ttm_bo_kunmap(&map);
-out_unlock:
- xe_bo_unlock(bo);
- return ret;
-}
-
-#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h
deleted file mode 100644
index 2a3f12d2978c..000000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2022 Intel Corporation
- */
-
-#ifndef _I915_GEM_OBJECT_FRONTBUFFER_H_
-#define _I915_GEM_OBJECT_FRONTBUFFER_H_
-
-#define i915_gem_object_get_frontbuffer(obj) NULL
-#define i915_gem_object_set_frontbuffer(obj, front) (front)
-
-#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_types.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_types.h
deleted file mode 100644
index 7d6bb1abab73..000000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_types.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/* Copyright © 2024 Intel Corporation */
-
-#ifndef __I915_GEM_OBJECT_TYPES_H__
-#define __I915_GEM_OBJECT_TYPES_H__
-
-#include "xe_bo.h"
-
-#define to_intel_bo(x) gem_to_xe_bo((x))
-
-#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h
index cb6c7598824b..9c4cf050059a 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h
@@ -29,7 +29,7 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe,
bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe),
NULL, size, start, end,
- ttm_bo_type_kernel, flags);
+ ttm_bo_type_kernel, flags, 0);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
bo = NULL;
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h
deleted file mode 100644
index b4c47617b64b..000000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef __I915_DEBUGFS_H__
-#define __I915_DEBUGFS_H__
-
-struct drm_i915_gem_object;
-struct seq_file;
-
-static inline void i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) {}
-
-#endif /* __I915_DEBUGFS_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
index f27a2c75b56d..84b0991b35b3 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
@@ -14,6 +14,7 @@
#include "i915_utils.h"
#include "intel_runtime_pm.h"
+#include "xe_device.h" /* for xe_device_has_flat_ccs() */
#include "xe_device_types.h"
static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
@@ -66,19 +67,14 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
#define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_METEORLAKE)
#define IS_LUNARLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_LUNARLAKE)
#define IS_BATTLEMAGE(dev_priv) IS_PLATFORM(dev_priv, XE_BATTLEMAGE)
+#define IS_PANTHERLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_PANTHERLAKE)
#define IS_HASWELL_ULT(dev_priv) (dev_priv && 0)
#define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0)
#define IS_BROADWELL_ULX(dev_priv) (dev_priv && 0)
-#define IP_VER(ver, rel) ((ver) << 8 | (rel))
-
#define IS_MOBILE(xe) (xe && 0)
-#define IS_LP(xe) ((xe) && 0)
-#define IS_GEN9_LP(xe) ((xe) && 0)
-#define IS_GEN9_BC(xe) ((xe) && 0)
-
#define IS_TIGERLAKE_UY(xe) (xe && 0)
#define IS_COMETLAKE_ULX(xe) (xe && 0)
#define IS_COFFEELAKE_ULX(xe) (xe && 0)
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h
deleted file mode 100644
index 98e9dd78f670..000000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef _I915_GPU_ERROR_H_
-#define _I915_GPU_ERROR_H_
-
-struct drm_i915_error_state_buf;
-
-__printf(2, 3)
-static inline void
-i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
-{
-}
-
-#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h
index 8c7b315aa8ac..274042bff1be 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h
@@ -20,18 +20,26 @@ static inline void enable_rpm_wakeref_asserts(void *rpm)
{
}
+static inline bool
+intel_runtime_pm_suspended(struct xe_runtime_pm *pm)
+{
+ struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
+
+ return pm_runtime_suspended(xe->drm.dev);
+}
+
static inline intel_wakeref_t intel_runtime_pm_get(struct xe_runtime_pm *pm)
{
struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
- return xe_pm_runtime_resume_and_get(xe);
+ return xe_pm_runtime_resume_and_get(xe) ? INTEL_WAKEREF_DEF : NULL;
}
static inline intel_wakeref_t intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm)
{
struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
- return xe_pm_runtime_get_if_in_use(xe);
+ return xe_pm_runtime_get_if_in_use(xe) ? INTEL_WAKEREF_DEF : NULL;
}
static inline intel_wakeref_t intel_runtime_pm_get_noresume(struct xe_runtime_pm *pm)
@@ -39,7 +47,8 @@ static inline intel_wakeref_t intel_runtime_pm_get_noresume(struct xe_runtime_pm
struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
xe_pm_runtime_get_noresume(xe);
- return true;
+
+ return INTEL_WAKEREF_DEF;
}
static inline void intel_runtime_pm_put_unchecked(struct xe_runtime_pm *pm)
@@ -62,6 +71,6 @@ static inline void intel_runtime_pm_put(struct xe_runtime_pm *pm, intel_wakeref_
#define with_intel_runtime_pm(rpm, wf) \
for ((wf) = intel_runtime_pm_get(rpm); (wf); \
- intel_runtime_pm_put((rpm), (wf)), (wf) = 0)
+ intel_runtime_pm_put((rpm), (wf)), (wf) = NULL)
#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
index eb5b5f0e4bd9..0382beb4035b 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
@@ -10,11 +10,11 @@
#include "xe_device_types.h"
#include "xe_mmio.h"
-static inline struct xe_gt *__compat_uncore_to_gt(struct intel_uncore *uncore)
+static inline struct xe_mmio *__compat_uncore_to_mmio(struct intel_uncore *uncore)
{
struct xe_device *xe = container_of(uncore, struct xe_device, uncore);
- return xe_root_mmio_gt(xe);
+ return xe_root_tile_mmio(xe);
}
static inline struct xe_tile *__compat_uncore_to_tile(struct intel_uncore *uncore)
@@ -29,7 +29,7 @@ static inline u32 intel_uncore_read(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+ return xe_mmio_read32(__compat_uncore_to_mmio(uncore), reg);
}
static inline u8 intel_uncore_read8(struct intel_uncore *uncore,
@@ -37,7 +37,7 @@ static inline u8 intel_uncore_read8(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_read8(__compat_uncore_to_gt(uncore), reg);
+ return xe_mmio_read8(__compat_uncore_to_mmio(uncore), reg);
}
static inline u16 intel_uncore_read16(struct intel_uncore *uncore,
@@ -45,7 +45,7 @@ static inline u16 intel_uncore_read16(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_read16(__compat_uncore_to_gt(uncore), reg);
+ return xe_mmio_read16(__compat_uncore_to_mmio(uncore), reg);
}
static inline u64
@@ -57,11 +57,11 @@ intel_uncore_read64_2x32(struct intel_uncore *uncore,
u32 upper, lower, old_upper;
int loop = 0;
- upper = xe_mmio_read32(__compat_uncore_to_gt(uncore), upper_reg);
+ upper = xe_mmio_read32(__compat_uncore_to_mmio(uncore), upper_reg);
do {
old_upper = upper;
- lower = xe_mmio_read32(__compat_uncore_to_gt(uncore), lower_reg);
- upper = xe_mmio_read32(__compat_uncore_to_gt(uncore), upper_reg);
+ lower = xe_mmio_read32(__compat_uncore_to_mmio(uncore), lower_reg);
+ upper = xe_mmio_read32(__compat_uncore_to_mmio(uncore), upper_reg);
} while (upper != old_upper && loop++ < 2);
return (u64)upper << 32 | lower;
@@ -72,7 +72,7 @@ static inline void intel_uncore_posting_read(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+ xe_mmio_read32(__compat_uncore_to_mmio(uncore), reg);
}
static inline void intel_uncore_write(struct intel_uncore *uncore,
@@ -80,7 +80,7 @@ static inline void intel_uncore_write(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val);
+ xe_mmio_write32(__compat_uncore_to_mmio(uncore), reg, val);
}
static inline u32 intel_uncore_rmw(struct intel_uncore *uncore,
@@ -88,7 +88,7 @@ static inline u32 intel_uncore_rmw(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_rmw32(__compat_uncore_to_gt(uncore), reg, clear, set);
+ return xe_mmio_rmw32(__compat_uncore_to_mmio(uncore), reg, clear, set);
}
static inline int intel_wait_for_register(struct intel_uncore *uncore,
@@ -97,7 +97,7 @@ static inline int intel_wait_for_register(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value,
+ return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value,
timeout * USEC_PER_MSEC, NULL, false);
}
@@ -107,7 +107,7 @@ static inline int intel_wait_for_register_fw(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value,
+ return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value,
timeout * USEC_PER_MSEC, NULL, false);
}
@@ -118,7 +118,7 @@ __intel_wait_for_register(struct intel_uncore *uncore, i915_reg_t i915_reg,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value,
+ return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value,
fast_timeout_us + 1000 * slow_timeout_ms,
out_value, false);
}
@@ -128,7 +128,7 @@ static inline u32 intel_uncore_read_fw(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+ return xe_mmio_read32(__compat_uncore_to_mmio(uncore), reg);
}
static inline void intel_uncore_write_fw(struct intel_uncore *uncore,
@@ -136,7 +136,7 @@ static inline void intel_uncore_write_fw(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val);
+ xe_mmio_write32(__compat_uncore_to_mmio(uncore), reg, val);
}
static inline u32 intel_uncore_read_notrace(struct intel_uncore *uncore,
@@ -144,7 +144,7 @@ static inline u32 intel_uncore_read_notrace(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+ return xe_mmio_read32(__compat_uncore_to_mmio(uncore), reg);
}
static inline void intel_uncore_write_notrace(struct intel_uncore *uncore,
@@ -152,33 +152,9 @@ static inline void intel_uncore_write_notrace(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val);
+ xe_mmio_write32(__compat_uncore_to_mmio(uncore), reg, val);
}
-static inline void __iomem *intel_uncore_regs(struct intel_uncore *uncore)
-{
- struct xe_device *xe = container_of(uncore, struct xe_device, uncore);
-
- return xe_device_get_root_tile(xe)->mmio.regs;
-}
-
-/*
- * The raw_reg_{read,write} macros are intended as a micro-optimization for
- * interrupt handlers so that the pointer indirection on uncore->regs can
- * be computed once (and presumably cached in a register) instead of generating
- * extra load instructions for each MMIO access.
- *
- * Given that these macros are only intended for non-GSI interrupt registers
- * (and the goal is to avoid extra instructions generated by the compiler),
- * these macros do not account for uncore->gsi_offset. Any caller that needs
- * to use these macros on a GSI register is responsible for adding the
- * appropriate GSI offset to the 'base' parameter.
- */
-#define raw_reg_read(base, reg) \
- readl(base + i915_mmio_reg_offset(reg))
-#define raw_reg_write(base, reg, value) \
- writel(value, base + i915_mmio_reg_offset(reg))
-
#define intel_uncore_forcewake_get(x, y) do { } while (0)
#define intel_uncore_forcewake_put(x, y) do { } while (0)
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h
index ecb1c0707706..2a32faea9db5 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h
@@ -5,4 +5,6 @@
#include <linux/types.h>
-typedef unsigned long intel_wakeref_t;
+typedef struct ref_tracker *intel_wakeref_t;
+
+#define INTEL_WAKEREF_DEF ERR_PTR(-ENOENT)
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h b/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h
index c2c30ece8f77..5dfc587c8237 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h
@@ -9,20 +9,14 @@
#include <linux/errno.h>
#include <linux/types.h>
-struct drm_i915_gem_object;
+struct drm_gem_object;
struct intel_pxp;
static inline int intel_pxp_key_check(struct intel_pxp *pxp,
- struct drm_i915_gem_object *obj,
+ struct drm_gem_object *obj,
bool assign)
{
return -ENODEV;
}
-static inline bool
-i915_gem_object_is_protected(const struct drm_i915_gem_object *obj)
-{
- return false;
-}
-
#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_rom.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_rom.h
new file mode 100644
index 000000000000..05cbfb697b2b
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_rom.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include "../../../i915/soc/intel_rom.h"
diff --git a/drivers/gpu/drm/xe/display/ext/i915_irq.c b/drivers/gpu/drm/xe/display/ext/i915_irq.c
index eb40f1cb44f6..a7dbc6554d69 100644
--- a/drivers/gpu/drm/xe/display/ext/i915_irq.c
+++ b/drivers/gpu/drm/xe/display/ext/i915_irq.c
@@ -7,25 +7,24 @@
#include "i915_reg.h"
#include "intel_uncore.h"
-void gen3_irq_reset(struct intel_uncore *uncore, i915_reg_t imr,
- i915_reg_t iir, i915_reg_t ier)
+void gen2_irq_reset(struct intel_uncore *uncore, struct i915_irq_regs regs)
{
- intel_uncore_write(uncore, imr, 0xffffffff);
- intel_uncore_posting_read(uncore, imr);
+ intel_uncore_write(uncore, regs.imr, 0xffffffff);
+ intel_uncore_posting_read(uncore, regs.imr);
- intel_uncore_write(uncore, ier, 0);
+ intel_uncore_write(uncore, regs.ier, 0);
/* IIR can theoretically queue up two events. Be paranoid. */
- intel_uncore_write(uncore, iir, 0xffffffff);
- intel_uncore_posting_read(uncore, iir);
- intel_uncore_write(uncore, iir, 0xffffffff);
- intel_uncore_posting_read(uncore, iir);
+ intel_uncore_write(uncore, regs.iir, 0xffffffff);
+ intel_uncore_posting_read(uncore, regs.iir);
+ intel_uncore_write(uncore, regs.iir, 0xffffffff);
+ intel_uncore_posting_read(uncore, regs.iir);
}
/*
* We should clear IMR at preinstall/uninstall, and just check at postinstall.
*/
-void gen3_assert_iir_is_zero(struct intel_uncore *uncore, i915_reg_t reg)
+void gen2_assert_iir_is_zero(struct intel_uncore *uncore, i915_reg_t reg)
{
struct xe_device *xe = container_of(uncore, struct xe_device, uncore);
u32 val = intel_uncore_read(uncore, reg);
@@ -42,16 +41,14 @@ void gen3_assert_iir_is_zero(struct intel_uncore *uncore, i915_reg_t reg)
intel_uncore_posting_read(uncore, reg);
}
-void gen3_irq_init(struct intel_uncore *uncore,
- i915_reg_t imr, u32 imr_val,
- i915_reg_t ier, u32 ier_val,
- i915_reg_t iir)
+void gen2_irq_init(struct intel_uncore *uncore, struct i915_irq_regs regs,
+ u32 imr_val, u32 ier_val)
{
- gen3_assert_iir_is_zero(uncore, iir);
+ gen2_assert_iir_is_zero(uncore, regs.iir);
- intel_uncore_write(uncore, ier, ier_val);
- intel_uncore_write(uncore, imr, imr_val);
- intel_uncore_posting_read(uncore, imr);
+ intel_uncore_write(uncore, regs.ier, ier_val);
+ intel_uncore_write(uncore, regs.imr, imr_val);
+ intel_uncore_posting_read(uncore, regs.imr);
}
bool intel_irqs_enabled(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/display/intel_bo.c b/drivers/gpu/drm/xe/display/intel_bo.c
new file mode 100644
index 000000000000..9f54fad0f1c0
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/intel_bo.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: MIT
+/* Copyright © 2024 Intel Corporation */
+
+#include <drm/drm_gem.h>
+
+#include "xe_bo.h"
+#include "intel_bo.h"
+
+bool intel_bo_is_tiled(struct drm_gem_object *obj)
+{
+ /* legacy tiling is unused */
+ return false;
+}
+
+bool intel_bo_is_userptr(struct drm_gem_object *obj)
+{
+ /* xe does not have userptr bos */
+ return false;
+}
+
+bool intel_bo_is_shmem(struct drm_gem_object *obj)
+{
+ return false;
+}
+
+bool intel_bo_is_protected(struct drm_gem_object *obj)
+{
+ return false;
+}
+
+void intel_bo_flush_if_display(struct drm_gem_object *obj)
+{
+}
+
+int intel_bo_fb_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+ return drm_gem_prime_mmap(obj, vma);
+}
+
+int intel_bo_read_from_page(struct drm_gem_object *obj, u64 offset, void *dst, int size)
+{
+ struct xe_bo *bo = gem_to_xe_bo(obj);
+ struct ttm_bo_kmap_obj map;
+ void *src;
+ bool is_iomem;
+ int ret;
+
+ ret = xe_bo_lock(bo, true);
+ if (ret)
+ return ret;
+
+ ret = ttm_bo_kmap(&bo->ttm, offset >> PAGE_SHIFT, 1, &map);
+ if (ret)
+ goto out_unlock;
+
+ offset &= ~PAGE_MASK;
+ src = ttm_kmap_obj_virtual(&map, &is_iomem);
+ src += offset;
+ if (is_iomem)
+ memcpy_fromio(dst, (void __iomem *)src, size);
+ else
+ memcpy(dst, src, size);
+
+ ttm_bo_kunmap(&map);
+out_unlock:
+ xe_bo_unlock(bo);
+ return ret;
+}
+
+struct intel_frontbuffer *intel_bo_get_frontbuffer(struct drm_gem_object *obj)
+{
+ return NULL;
+}
+
+struct intel_frontbuffer *intel_bo_set_frontbuffer(struct drm_gem_object *obj,
+ struct intel_frontbuffer *front)
+{
+ return front;
+}
+
+void intel_bo_describe(struct seq_file *m, struct drm_gem_object *obj)
+{
+ /* FIXME */
+}
diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c
index 63ce97cc4cfe..4d209ebc26c2 100644
--- a/drivers/gpu/drm/xe/display/intel_fb_bo.c
+++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c
@@ -11,8 +11,10 @@
#include "intel_fb_bo.h"
#include "xe_bo.h"
-void intel_fb_bo_framebuffer_fini(struct xe_bo *bo)
+void intel_fb_bo_framebuffer_fini(struct drm_gem_object *obj)
{
+ struct xe_bo *bo = gem_to_xe_bo(obj);
+
if (bo->flags & XE_BO_FLAG_PINNED) {
/* Unpin our kernel fb first */
xe_bo_lock(bo, false);
@@ -23,9 +25,10 @@ void intel_fb_bo_framebuffer_fini(struct xe_bo *bo)
}
int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
- struct xe_bo *bo,
+ struct drm_gem_object *obj,
struct drm_mode_fb_cmd2 *mode_cmd)
{
+ struct xe_bo *bo = gem_to_xe_bo(obj);
struct xe_device *xe = to_xe_device(bo->ttm.base.dev);
int ret;
@@ -65,11 +68,11 @@ err:
return ret;
}
-struct xe_bo *intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
- struct drm_file *filp,
- const struct drm_mode_fb_cmd2 *mode_cmd)
+struct drm_gem_object *intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
+ struct drm_file *filp,
+ const struct drm_mode_fb_cmd2 *mode_cmd)
{
- struct drm_i915_gem_object *bo;
+ struct xe_bo *bo;
struct drm_gem_object *gem = drm_gem_object_lookup(filp, mode_cmd->handles[0]);
if (!gem)
@@ -78,11 +81,11 @@ struct xe_bo *intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
bo = gem_to_xe_bo(gem);
/* Require vram placement or dma-buf import */
if (IS_DGFX(i915) &&
- !xe_bo_can_migrate(gem_to_xe_bo(gem), XE_PL_VRAM0) &&
+ !xe_bo_can_migrate(bo, XE_PL_VRAM0) &&
bo->ttm.type != ttm_bo_type_sg) {
drm_gem_object_put(gem);
return ERR_PTR(-EREMOTE);
}
- return bo;
+ return gem;
}
diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.h b/drivers/gpu/drm/xe/display/intel_fb_bo.h
deleted file mode 100644
index 5d365b925b7a..000000000000
--- a/drivers/gpu/drm/xe/display/intel_fb_bo.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2021 Intel Corporation
- */
-
-#ifndef __INTEL_FB_BO_H__
-#define __INTEL_FB_BO_H__
-
-struct drm_file;
-struct drm_mode_fb_cmd2;
-struct drm_i915_private;
-struct intel_framebuffer;
-struct xe_bo;
-
-void intel_fb_bo_framebuffer_fini(struct xe_bo *bo);
-int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
- struct xe_bo *bo,
- struct drm_mode_fb_cmd2 *mode_cmd);
-
-struct xe_bo *intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
- struct drm_file *filp,
- const struct drm_mode_fb_cmd2 *mode_cmd);
-
-#endif
diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
index 99499d6c0256..ca95fcd098ec 100644
--- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
+++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
@@ -6,6 +6,7 @@
#include <drm/drm_fb_helper.h>
#include "intel_display_types.h"
+#include "intel_fb.h"
#include "intel_fbdev_fb.h"
#include "xe_bo.h"
#include "xe_ttm_stolen_mgr.h"
@@ -20,7 +21,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
struct drm_device *dev = helper->dev;
struct xe_device *xe = to_xe_device(dev);
struct drm_mode_fb_cmd2 mode_cmd = {};
- struct drm_i915_gem_object *obj;
+ struct xe_bo *obj;
int size;
/* we don't do packed 24bpp */
@@ -64,13 +65,13 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
goto err;
}
- fb = intel_framebuffer_create(obj, &mode_cmd);
+ fb = intel_framebuffer_create(&obj->ttm.base, &mode_cmd);
if (IS_ERR(fb)) {
xe_bo_unpin_map_no_vm(obj);
goto err;
}
- drm_gem_object_put(intel_bo_to_drm_bo(obj));
+ drm_gem_object_put(&obj->ttm.base);
return to_intel_framebuffer(fb);
@@ -79,8 +80,9 @@ err:
}
int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info,
- struct drm_i915_gem_object *obj, struct i915_vma *vma)
+ struct drm_gem_object *_obj, struct i915_vma *vma)
{
+ struct xe_bo *obj = gem_to_xe_bo(_obj);
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
if (!(obj->flags & XE_BO_FLAG_SYSTEM)) {
@@ -100,7 +102,7 @@ int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info
XE_WARN_ON(iosys_map_is_null(&obj->vmap));
info->screen_base = obj->vmap.vaddr_iomem;
- info->screen_size = intel_bo_to_drm_bo(obj)->size;
+ info->screen_size = obj->ttm.base.size;
return 0;
}
diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index c6e0c8d77a70..b5502f335f53 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -4,16 +4,16 @@
*/
#include "xe_display.h"
-#include "regs/xe_regs.h"
+#include "regs/xe_irq_regs.h"
#include <linux/fb.h>
#include <drm/drm_drv.h>
#include <drm/drm_managed.h>
+#include <drm/drm_probe_helper.h>
#include <uapi/drm/xe_drm.h>
#include "soc/intel_dram.h"
-#include "i915_drv.h" /* FIXME: HAS_DISPLAY() depends on this */
#include "intel_acpi.h"
#include "intel_audio.h"
#include "intel_bw.h"
@@ -34,7 +34,7 @@
static bool has_display(struct xe_device *xe)
{
- return HAS_DISPLAY(xe);
+ return HAS_DISPLAY(&xe->display);
}
/**
@@ -202,12 +202,14 @@ int xe_display_init(struct xe_device *xe)
void xe_display_fini(struct xe_device *xe)
{
+ struct intel_display *display = &xe->display;
+
if (!xe->info.probe_display)
return;
intel_hpd_poll_fini(xe);
- intel_hdcp_component_fini(xe);
+ intel_hdcp_component_fini(display);
intel_audio_deinit(xe);
}
@@ -321,7 +323,9 @@ static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime)
* properly.
*/
intel_power_domains_disable(xe);
- intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true);
+ if (!runtime)
+ intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true);
+
if (!runtime && has_display(xe)) {
drm_kms_helper_poll_disable(&xe->drm);
intel_display_driver_disable_user_access(xe);
@@ -330,7 +334,8 @@ static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime)
xe_display_flush_cleanup_work(xe);
- intel_dp_mst_suspend(xe);
+ if (!runtime)
+ intel_dp_mst_suspend(xe);
intel_hpd_cancel_work(xe);
@@ -341,7 +346,7 @@ static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime)
intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold);
- intel_dmc_suspend(xe);
+ intel_dmc_suspend(display);
if (runtime && has_display(xe))
intel_hpd_poll_enable(xe);
@@ -352,6 +357,36 @@ void xe_display_pm_suspend(struct xe_device *xe)
__xe_display_pm_suspend(xe, false);
}
+void xe_display_pm_shutdown(struct xe_device *xe)
+{
+ struct intel_display *display = &xe->display;
+
+ if (!xe->info.probe_display)
+ return;
+
+ intel_power_domains_disable(xe);
+ intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true);
+ if (has_display(xe)) {
+ drm_kms_helper_poll_disable(&xe->drm);
+ intel_display_driver_disable_user_access(xe);
+ intel_display_driver_suspend(xe);
+ }
+
+ xe_display_flush_cleanup_work(xe);
+ intel_dp_mst_suspend(xe);
+ intel_hpd_cancel_work(xe);
+
+ if (has_display(xe))
+ intel_display_driver_suspend_access(xe);
+
+ intel_encoder_suspend_all(display);
+ intel_encoder_shutdown_all(display);
+
+ intel_opregion_suspend(display, PCI_D3cold);
+
+ intel_dmc_suspend(display);
+}
+
void xe_display_pm_runtime_suspend(struct xe_device *xe)
{
if (!xe->info.probe_display)
@@ -376,6 +411,19 @@ void xe_display_pm_suspend_late(struct xe_device *xe)
intel_display_power_suspend_late(xe);
}
+void xe_display_pm_shutdown_late(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ /*
+ * The only requirement is to reboot with display DC states disabled,
+ * for now leaving all display power wells in the INIT power domain
+ * enabled.
+ */
+ intel_power_domains_driver_remove(xe);
+}
+
void xe_display_pm_resume_early(struct xe_device *xe)
{
if (!xe->info.probe_display)
@@ -393,7 +441,7 @@ static void __xe_display_pm_resume(struct xe_device *xe, bool runtime)
if (!xe->info.probe_display)
return;
- intel_dmc_resume(xe);
+ intel_dmc_resume(display);
if (has_display(xe))
drm_mode_config_reset(&xe->drm);
@@ -405,7 +453,9 @@ static void __xe_display_pm_resume(struct xe_device *xe, bool runtime)
intel_display_driver_resume_access(xe);
/* MST sideband requires HPD interrupts enabled */
- intel_dp_mst_resume(xe);
+ if (!runtime)
+ intel_dp_mst_resume(xe);
+
if (!runtime && has_display(xe)) {
intel_display_driver_resume(xe);
drm_kms_helper_poll_enable(&xe->drm);
@@ -417,7 +467,8 @@ static void __xe_display_pm_resume(struct xe_device *xe, bool runtime)
intel_opregion_resume(display);
- intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false);
+ if (!runtime)
+ intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false);
intel_power_domains_enable(xe);
}
diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h
index bed55fd26f30..17afa537aee5 100644
--- a/drivers/gpu/drm/xe/display/xe_display.h
+++ b/drivers/gpu/drm/xe/display/xe_display.h
@@ -35,7 +35,9 @@ void xe_display_irq_reset(struct xe_device *xe);
void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt);
void xe_display_pm_suspend(struct xe_device *xe);
+void xe_display_pm_shutdown(struct xe_device *xe);
void xe_display_pm_suspend_late(struct xe_device *xe);
+void xe_display_pm_shutdown_late(struct xe_device *xe);
void xe_display_pm_resume_early(struct xe_device *xe);
void xe_display_pm_resume(struct xe_device *xe);
void xe_display_pm_runtime_suspend(struct xe_device *xe);
@@ -66,7 +68,9 @@ static inline void xe_display_irq_reset(struct xe_device *xe) {}
static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {}
static inline void xe_display_pm_suspend(struct xe_device *xe) {}
+static inline void xe_display_pm_shutdown(struct xe_device *xe) {}
static inline void xe_display_pm_suspend_late(struct xe_device *xe) {}
+static inline void xe_display_pm_shutdown_late(struct xe_device *xe) {}
static inline void xe_display_pm_resume_early(struct xe_device *xe) {}
static inline void xe_display_pm_resume(struct xe_device *xe) {}
static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {}
diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
index f99d901a3214..f95375451e2f 100644
--- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
+++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
@@ -48,11 +48,12 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d
if (!vma)
return false;
+ /* Set scanout flag for WC mapping */
obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe),
NULL, PAGE_ALIGN(size),
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
- XE_BO_FLAG_GGTT);
+ XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT);
if (IS_ERR(obj)) {
kfree(vma);
return false;
@@ -73,5 +74,9 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf)
void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf)
{
- /* TODO: add xe specific flush_map() for dsb buffer object. */
+ /*
+ * The memory barrier here is to ensure coherency of DSB vs MMIO,
+ * both for weak ordering archs and discrete cards.
+ */
+ xe_device_wmb(dsb_buf->vma->bo->tile->xe);
}
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index b58fc4ba2aac..761510ae0690 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -79,12 +79,14 @@ write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs,
static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
const struct i915_gtt_view *view,
- struct i915_vma *vma)
+ struct i915_vma *vma,
+ u64 physical_alignment)
{
struct xe_device *xe = to_xe_device(fb->base.dev);
struct xe_tile *tile0 = xe_device_get_root_tile(xe);
struct xe_ggtt *ggtt = tile0->mem.ggtt;
- struct xe_bo *bo = intel_fb_obj(&fb->base), *dpt;
+ struct drm_gem_object *obj = intel_fb_bo(&fb->base);
+ struct xe_bo *bo = gem_to_xe_bo(obj), *dpt;
u32 dpt_size, size = bo->ttm.base.size;
if (view->type == I915_GTT_VIEW_NORMAL)
@@ -98,23 +100,29 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
XE_PAGE_SIZE);
if (IS_DGFX(xe))
- dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM0 |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_PAGETABLE);
+ dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL,
+ dpt_size, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM0 |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_PAGETABLE,
+ physical_alignment);
else
- dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
- ttm_bo_type_kernel,
- XE_BO_FLAG_STOLEN |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_PAGETABLE);
+ dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL,
+ dpt_size, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_STOLEN |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_PAGETABLE,
+ physical_alignment);
if (IS_ERR(dpt))
- dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_PAGETABLE);
+ dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL,
+ dpt_size, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_PAGETABLE,
+ physical_alignment);
if (IS_ERR(dpt))
return PTR_ERR(dpt);
@@ -183,9 +191,11 @@ write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo
static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb,
const struct i915_gtt_view *view,
- struct i915_vma *vma)
+ struct i915_vma *vma,
+ u64 physical_alignment)
{
- struct xe_bo *bo = intel_fb_obj(&fb->base);
+ struct drm_gem_object *obj = intel_fb_bo(&fb->base);
+ struct xe_bo *bo = gem_to_xe_bo(obj);
struct xe_device *xe = to_xe_device(fb->base.dev);
struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
u32 align;
@@ -264,12 +274,14 @@ out:
}
static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
- const struct i915_gtt_view *view)
+ const struct i915_gtt_view *view,
+ u64 physical_alignment)
{
struct drm_device *dev = fb->base.dev;
struct xe_device *xe = to_xe_device(dev);
struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
- struct xe_bo *bo = intel_fb_obj(&fb->base);
+ struct drm_gem_object *obj = intel_fb_bo(&fb->base);
+ struct xe_bo *bo = gem_to_xe_bo(obj);
int ret;
if (!vma)
@@ -312,9 +324,9 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
vma->bo = bo;
if (intel_fb_uses_dpt(&fb->base))
- ret = __xe_pin_fb_vma_dpt(fb, view, vma);
+ ret = __xe_pin_fb_vma_dpt(fb, view, vma, physical_alignment);
else
- ret = __xe_pin_fb_vma_ggtt(fb, view, vma);
+ ret = __xe_pin_fb_vma_ggtt(fb, view, vma, physical_alignment);
if (ret)
goto err_unpin;
@@ -355,7 +367,7 @@ intel_fb_pin_to_ggtt(const struct drm_framebuffer *fb,
{
*out_flags = 0;
- return __xe_pin_fb_vma(to_intel_framebuffer(fb), view);
+ return __xe_pin_fb_vma(to_intel_framebuffer(fb), view, phys_alignment);
}
void intel_fb_unpin_vma(struct i915_vma *vma, unsigned long flags)
@@ -366,13 +378,18 @@ void intel_fb_unpin_vma(struct i915_vma *vma, unsigned long flags)
int intel_plane_pin_fb(struct intel_plane_state *plane_state)
{
struct drm_framebuffer *fb = plane_state->hw.fb;
- struct xe_bo *bo = intel_fb_obj(fb);
+ struct drm_gem_object *obj = intel_fb_bo(fb);
+ struct xe_bo *bo = gem_to_xe_bo(obj);
struct i915_vma *vma;
+ struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+ struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
+ u64 phys_alignment = plane->min_alignment(plane, fb, 0);
/* We reject creating !SCANOUT fb's, so this is weird.. */
drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_FLAG_SCANOUT));
- vma = __xe_pin_fb_vma(to_intel_framebuffer(fb), &plane_state->view.gtt);
+ vma = __xe_pin_fb_vma(intel_fb, &plane_state->view.gtt, phys_alignment);
+
if (IS_ERR(vma))
return PTR_ERR(vma);
diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
index 6619a40aed15..7c02323e9531 100644
--- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
+++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
@@ -30,26 +30,29 @@ struct intel_hdcp_gsc_message {
#define HDCP_GSC_HEADER_SIZE sizeof(struct intel_gsc_mtl_header)
-bool intel_hdcp_gsc_cs_required(struct xe_device *xe)
+bool intel_hdcp_gsc_cs_required(struct intel_display *display)
{
- return DISPLAY_VER(xe) >= 14;
+ return DISPLAY_VER(display) >= 14;
}
-bool intel_hdcp_gsc_check_status(struct xe_device *xe)
+bool intel_hdcp_gsc_check_status(struct intel_display *display)
{
+ struct xe_device *xe = to_xe_device(display->drm);
struct xe_tile *tile = xe_device_get_root_tile(xe);
struct xe_gt *gt = tile->media_gt;
struct xe_gsc *gsc = &gt->uc.gsc;
bool ret = true;
+ unsigned int fw_ref;
- if (!gsc && !xe_uc_fw_is_enabled(&gsc->fw)) {
+ if (!gsc || !xe_uc_fw_is_enabled(&gsc->fw)) {
drm_dbg_kms(&xe->drm,
"GSC Components not ready for HDCP2.x\n");
return false;
}
xe_pm_runtime_get(xe);
- if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)) {
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
+ if (!fw_ref) {
drm_dbg_kms(&xe->drm,
"failed to get forcewake to check proxy status\n");
ret = false;
@@ -59,16 +62,17 @@ bool intel_hdcp_gsc_check_status(struct xe_device *xe)
if (!xe_gsc_proxy_init_done(gsc))
ret = false;
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
out:
xe_pm_runtime_put(xe);
return ret;
}
/*This function helps allocate memory for the command that we will send to gsc cs */
-static int intel_hdcp_gsc_initialize_message(struct xe_device *xe,
+static int intel_hdcp_gsc_initialize_message(struct intel_display *display,
struct intel_hdcp_gsc_message *hdcp_message)
{
+ struct xe_device *xe = to_xe_device(display->drm);
struct xe_bo *bo = NULL;
u64 cmd_in, cmd_out;
int ret = 0;
@@ -80,7 +84,7 @@ static int intel_hdcp_gsc_initialize_message(struct xe_device *xe,
XE_BO_FLAG_GGTT);
if (IS_ERR(bo)) {
- drm_err(&xe->drm, "Failed to allocate bo for HDCP streaming command!\n");
+ drm_err(display->drm, "Failed to allocate bo for HDCP streaming command!\n");
ret = PTR_ERR(bo);
goto out;
}
@@ -96,7 +100,7 @@ out:
return ret;
}
-static int intel_hdcp_gsc_hdcp2_init(struct xe_device *xe)
+static int intel_hdcp_gsc_hdcp2_init(struct intel_display *display)
{
struct intel_hdcp_gsc_message *hdcp_message;
int ret;
@@ -110,14 +114,14 @@ static int intel_hdcp_gsc_hdcp2_init(struct xe_device *xe)
* NOTE: No need to lock the comp mutex here as it is already
* going to be taken before this function called
*/
- ret = intel_hdcp_gsc_initialize_message(xe, hdcp_message);
+ ret = intel_hdcp_gsc_initialize_message(display, hdcp_message);
if (ret) {
- drm_err(&xe->drm, "Could not initialize hdcp_message\n");
+ drm_err(display->drm, "Could not initialize hdcp_message\n");
kfree(hdcp_message);
return ret;
}
- xe->display.hdcp.hdcp_message = hdcp_message;
+ display->hdcp.hdcp_message = hdcp_message;
return ret;
}
@@ -137,7 +141,7 @@ static const struct i915_hdcp_ops gsc_hdcp_ops = {
.close_hdcp_session = intel_hdcp_gsc_close_session,
};
-int intel_hdcp_gsc_init(struct xe_device *xe)
+int intel_hdcp_gsc_init(struct intel_display *display)
{
struct i915_hdcp_arbiter *data;
int ret;
@@ -146,33 +150,33 @@ int intel_hdcp_gsc_init(struct xe_device *xe)
if (!data)
return -ENOMEM;
- mutex_lock(&xe->display.hdcp.hdcp_mutex);
- xe->display.hdcp.arbiter = data;
- xe->display.hdcp.arbiter->hdcp_dev = xe->drm.dev;
- xe->display.hdcp.arbiter->ops = &gsc_hdcp_ops;
- ret = intel_hdcp_gsc_hdcp2_init(xe);
+ mutex_lock(&display->hdcp.hdcp_mutex);
+ display->hdcp.arbiter = data;
+ display->hdcp.arbiter->hdcp_dev = display->drm->dev;
+ display->hdcp.arbiter->ops = &gsc_hdcp_ops;
+ ret = intel_hdcp_gsc_hdcp2_init(display);
if (ret)
kfree(data);
- mutex_unlock(&xe->display.hdcp.hdcp_mutex);
+ mutex_unlock(&display->hdcp.hdcp_mutex);
return ret;
}
-void intel_hdcp_gsc_fini(struct xe_device *xe)
+void intel_hdcp_gsc_fini(struct intel_display *display)
{
struct intel_hdcp_gsc_message *hdcp_message =
- xe->display.hdcp.hdcp_message;
- struct i915_hdcp_arbiter *arb = xe->display.hdcp.arbiter;
+ display->hdcp.hdcp_message;
+ struct i915_hdcp_arbiter *arb = display->hdcp.arbiter;
if (hdcp_message) {
xe_bo_unpin_map_no_vm(hdcp_message->hdcp_bo);
kfree(hdcp_message);
- xe->display.hdcp.hdcp_message = NULL;
+ display->hdcp.hdcp_message = NULL;
}
kfree(arb);
- xe->display.hdcp.arbiter = NULL;
+ display->hdcp.arbiter = NULL;
}
static int xe_gsc_send_sync(struct xe_device *xe,
diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c
index a50ab9eae40a..8c113463a3d5 100644
--- a/drivers/gpu/drm/xe/display/xe_plane_initial.c
+++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c
@@ -170,7 +170,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
return false;
if (intel_framebuffer_init(to_intel_framebuffer(fb),
- bo, &mode_cmd)) {
+ &bo->ttm.base, &mode_cmd)) {
drm_dbg_kms(&xe->drm, "intel fb init failed\n");
goto err_bo;
}
@@ -248,7 +248,7 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc,
* the lookup of sysmem scratch pages.
*/
plane->check_plane(crtc_state, plane_state);
- plane->async_flip(plane, crtc_state, plane_state, true);
+ plane->async_flip(NULL, plane, crtc_state, plane_state, true);
return;
nofb:
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 81b71903675e..7c78496e6213 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -186,6 +186,7 @@
#define VDBOX_CGCTL3F10(base) XE_REG((base) + 0x3f10)
#define IECPUNIT_CLKGATE_DIS REG_BIT(22)
+#define RAMDFTUNIT_CLKGATE_DIS REG_BIT(9)
#define VDBOX_CGCTL3F18(base) XE_REG((base) + 0x3f18)
#define ALNUNIT_CLKGATE_DIS REG_BIT(13)
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index bd604b9f08e4..0c9e4b2fafab 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -286,6 +286,9 @@
#define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16)
#define LTCDD_CLKGATE_DIS REG_BIT(10)
+#define UNSLCGCTL9454 XE_REG(0x9454)
+#define LSCFE_CLKGATE_DIS REG_BIT(4)
+
#define XEHP_SLICE_UNIT_LEVEL_CLKGATE XE_REG_MCR(0x94d4)
#define L3_CR2X_CLKGATE_DIS REG_BIT(17)
#define L3_CLKGATE_DIS REG_BIT(16)
@@ -344,6 +347,14 @@
#define CTC_SOURCE_DIVIDE_LOGIC REG_BIT(0)
#define FORCEWAKE_RENDER XE_REG(0xa278)
+
+#define POWERGATE_DOMAIN_STATUS XE_REG(0xa2a0)
+#define MEDIA_SLICE3_AWAKE_STATUS REG_BIT(4)
+#define MEDIA_SLICE2_AWAKE_STATUS REG_BIT(3)
+#define MEDIA_SLICE1_AWAKE_STATUS REG_BIT(2)
+#define RENDER_AWAKE_STATUS REG_BIT(1)
+#define MEDIA_SLICE0_AWAKE_STATUS REG_BIT(0)
+
#define FORCEWAKE_MEDIA_VDBOX(n) XE_REG(0xa540 + (n) * 4)
#define FORCEWAKE_MEDIA_VEBOX(n) XE_REG(0xa560 + (n) * 4)
#define FORCEWAKE_GSC XE_REG(0xa618)
@@ -556,62 +567,6 @@
#define GT_PERF_STATUS XE_REG(0x1381b4)
#define VOLTAGE_MASK REG_GENMASK(10, 0)
-/*
- * Note: Interrupt registers 1900xx are VF accessible only until version 12.50.
- * On newer platforms, VFs are using memory-based interrupts instead.
- * However, for simplicity we keep this XE_REG_OPTION_VF tag intact.
- */
-
-#define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4), XE_REG_OPTION_VF)
-#define INTR_GSC REG_BIT(31)
-#define INTR_GUC REG_BIT(25)
-#define INTR_MGUC REG_BIT(24)
-#define INTR_BCS8 REG_BIT(23)
-#define INTR_BCS(x) REG_BIT(15 - (x))
-#define INTR_CCS(x) REG_BIT(4 + (x))
-#define INTR_RCS0 REG_BIT(0)
-#define INTR_VECS(x) REG_BIT(31 - (x))
-#define INTR_VCS(x) REG_BIT(x)
-
-#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030, XE_REG_OPTION_VF)
-#define VCS_VECS_INTR_ENABLE XE_REG(0x190034, XE_REG_OPTION_VF)
-#define GUC_SG_INTR_ENABLE XE_REG(0x190038, XE_REG_OPTION_VF)
-#define ENGINE1_MASK REG_GENMASK(31, 16)
-#define ENGINE0_MASK REG_GENMASK(15, 0)
-#define GPM_WGBOXPERF_INTR_ENABLE XE_REG(0x19003c, XE_REG_OPTION_VF)
-#define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044, XE_REG_OPTION_VF)
-#define CCS_RSVD_INTR_ENABLE XE_REG(0x190048, XE_REG_OPTION_VF)
-
-#define INTR_IDENTITY_REG(x) XE_REG(0x190060 + ((x) * 4), XE_REG_OPTION_VF)
-#define INTR_DATA_VALID REG_BIT(31)
-#define INTR_ENGINE_INSTANCE(x) REG_FIELD_GET(GENMASK(25, 20), x)
-#define INTR_ENGINE_CLASS(x) REG_FIELD_GET(GENMASK(18, 16), x)
-#define INTR_ENGINE_INTR(x) REG_FIELD_GET(GENMASK(15, 0), x)
-#define OTHER_GUC_INSTANCE 0
-#define OTHER_GSC_HECI2_INSTANCE 3
-#define OTHER_GSC_INSTANCE 6
-
-#define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4), XE_REG_OPTION_VF)
-#define RCS0_RSVD_INTR_MASK XE_REG(0x190090, XE_REG_OPTION_VF)
-#define BCS_RSVD_INTR_MASK XE_REG(0x1900a0, XE_REG_OPTION_VF)
-#define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8, XE_REG_OPTION_VF)
-#define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac, XE_REG_OPTION_VF)
-#define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0, XE_REG_OPTION_VF)
-#define HECI2_RSVD_INTR_MASK XE_REG(0x1900e4)
-#define GUC_SG_INTR_MASK XE_REG(0x1900e8, XE_REG_OPTION_VF)
-#define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec, XE_REG_OPTION_VF)
-#define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4, XE_REG_OPTION_VF)
-#define CCS0_CCS1_INTR_MASK XE_REG(0x190100)
-#define CCS2_CCS3_INTR_MASK XE_REG(0x190104)
-#define XEHPC_BCS1_BCS2_INTR_MASK XE_REG(0x190110)
-#define XEHPC_BCS3_BCS4_INTR_MASK XE_REG(0x190114)
-#define XEHPC_BCS5_BCS6_INTR_MASK XE_REG(0x190118)
-#define XEHPC_BCS7_BCS8_INTR_MASK XE_REG(0x19011c)
-#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11)
-#define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8)
-#define GSC_ER_COMPLETE REG_BIT(5)
-#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4)
-#define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3)
-#define GT_RENDER_USER_INTERRUPT REG_BIT(0)
+#define SFC_DONE(n) XE_REG(0x1cc000 + (n) * 0x1000)
#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
index a5fd14307f94..2118f7dec287 100644
--- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
@@ -84,6 +84,8 @@
#define HUC_LOADING_AGENT_GUC REG_BIT(1)
#define GUC_WOPCM_OFFSET_VALID REG_BIT(0)
#define GUC_MAX_IDLE_COUNT XE_REG(0xc3e4)
+#define GUC_PMTIMESTAMP_LO XE_REG(0xc3e8)
+#define GUC_PMTIMESTAMP_HI XE_REG(0xc3ec)
#define GUC_SEND_INTERRUPT XE_REG(0xc4c8)
#define GUC_SEND_TRIGGER REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/regs/xe_irq_regs.h b/drivers/gpu/drm/xe/regs/xe_irq_regs.h
new file mode 100644
index 000000000000..1776b3f78ccb
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_irq_regs.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+#ifndef _XE_IRQ_REGS_H_
+#define _XE_IRQ_REGS_H_
+
+#include "regs/xe_reg_defs.h"
+
+#define PCU_IRQ_OFFSET 0x444e0
+#define GU_MISC_IRQ_OFFSET 0x444f0
+#define GU_MISC_GSE REG_BIT(27)
+
+#define DG1_MSTR_TILE_INTR XE_REG(0x190008)
+#define DG1_MSTR_IRQ REG_BIT(31)
+#define DG1_MSTR_TILE(t) REG_BIT(t)
+
+#define GFX_MSTR_IRQ XE_REG(0x190010, XE_REG_OPTION_VF)
+#define MASTER_IRQ REG_BIT(31)
+#define GU_MISC_IRQ REG_BIT(29)
+#define DISPLAY_IRQ REG_BIT(16)
+#define GT_DW_IRQ(x) REG_BIT(x)
+
+/*
+ * Note: Interrupt registers 1900xx are VF accessible only until version 12.50.
+ * On newer platforms, VFs are using memory-based interrupts instead.
+ * However, for simplicity we keep this XE_REG_OPTION_VF tag intact.
+ */
+
+#define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4), XE_REG_OPTION_VF)
+#define INTR_GSC REG_BIT(31)
+#define INTR_GUC REG_BIT(25)
+#define INTR_MGUC REG_BIT(24)
+#define INTR_BCS8 REG_BIT(23)
+#define INTR_BCS(x) REG_BIT(15 - (x))
+#define INTR_CCS(x) REG_BIT(4 + (x))
+#define INTR_RCS0 REG_BIT(0)
+#define INTR_VECS(x) REG_BIT(31 - (x))
+#define INTR_VCS(x) REG_BIT(x)
+
+#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030, XE_REG_OPTION_VF)
+#define VCS_VECS_INTR_ENABLE XE_REG(0x190034, XE_REG_OPTION_VF)
+#define GUC_SG_INTR_ENABLE XE_REG(0x190038, XE_REG_OPTION_VF)
+#define ENGINE1_MASK REG_GENMASK(31, 16)
+#define ENGINE0_MASK REG_GENMASK(15, 0)
+#define GPM_WGBOXPERF_INTR_ENABLE XE_REG(0x19003c, XE_REG_OPTION_VF)
+#define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044, XE_REG_OPTION_VF)
+#define CCS_RSVD_INTR_ENABLE XE_REG(0x190048, XE_REG_OPTION_VF)
+
+#define INTR_IDENTITY_REG(x) XE_REG(0x190060 + ((x) * 4), XE_REG_OPTION_VF)
+#define INTR_DATA_VALID REG_BIT(31)
+#define INTR_ENGINE_INSTANCE(x) REG_FIELD_GET(GENMASK(25, 20), x)
+#define INTR_ENGINE_CLASS(x) REG_FIELD_GET(GENMASK(18, 16), x)
+#define INTR_ENGINE_INTR(x) REG_FIELD_GET(GENMASK(15, 0), x)
+#define OTHER_GUC_INSTANCE 0
+#define OTHER_GSC_HECI2_INSTANCE 3
+#define OTHER_GSC_INSTANCE 6
+
+#define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4), XE_REG_OPTION_VF)
+#define RCS0_RSVD_INTR_MASK XE_REG(0x190090, XE_REG_OPTION_VF)
+#define BCS_RSVD_INTR_MASK XE_REG(0x1900a0, XE_REG_OPTION_VF)
+#define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8, XE_REG_OPTION_VF)
+#define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac, XE_REG_OPTION_VF)
+#define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0, XE_REG_OPTION_VF)
+#define HECI2_RSVD_INTR_MASK XE_REG(0x1900e4)
+#define GUC_SG_INTR_MASK XE_REG(0x1900e8, XE_REG_OPTION_VF)
+#define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec, XE_REG_OPTION_VF)
+#define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4, XE_REG_OPTION_VF)
+#define CCS0_CCS1_INTR_MASK XE_REG(0x190100)
+#define CCS2_CCS3_INTR_MASK XE_REG(0x190104)
+#define XEHPC_BCS1_BCS2_INTR_MASK XE_REG(0x190110)
+#define XEHPC_BCS3_BCS4_INTR_MASK XE_REG(0x190114)
+#define XEHPC_BCS5_BCS6_INTR_MASK XE_REG(0x190118)
+#define XEHPC_BCS7_BCS8_INTR_MASK XE_REG(0x19011c)
+#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11)
+#define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8)
+#define GSC_ER_COMPLETE REG_BIT(5)
+#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4)
+#define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3)
+#define GT_RENDER_USER_INTERRUPT REG_BIT(0)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
index 23f7dc5bbe99..51fd40ffafcb 100644
--- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h
+++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
@@ -128,7 +128,7 @@ struct xe_reg_mcr {
* options.
*/
#define XE_REG_MCR(r_, ...) ((const struct xe_reg_mcr){ \
- .__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \
+ .__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \
})
static inline bool xe_reg_is_valid(struct xe_reg r)
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index dfa869f0dddd..3293172b0128 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -11,10 +11,6 @@
#define TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK REG_GENMASK(15, 12)
#define TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK REG_GENMASK(9, 0)
-#define PCU_IRQ_OFFSET 0x444e0
-#define GU_MISC_IRQ_OFFSET 0x444f0
-#define GU_MISC_GSE REG_BIT(27)
-
#define GU_CNTL_PROTECTED XE_REG(0x10100C)
#define DRIVERINT_FLR_DIS REG_BIT(31)
@@ -57,16 +53,6 @@
#define MTL_MPE_FREQUENCY XE_REG(0x13802c)
#define MTL_RPE_MASK REG_GENMASK(8, 0)
-#define DG1_MSTR_TILE_INTR XE_REG(0x190008)
-#define DG1_MSTR_IRQ REG_BIT(31)
-#define DG1_MSTR_TILE(t) REG_BIT(t)
-
-#define GFX_MSTR_IRQ XE_REG(0x190010, XE_REG_OPTION_VF)
-#define MASTER_IRQ REG_BIT(31)
-#define GU_MISC_IRQ REG_BIT(29)
-#define DISPLAY_IRQ REG_BIT(16)
-#define GT_DW_IRQ(x) REG_BIT(x)
-
#define VF_CAP_REG XE_REG(0x1901f8, XE_REG_OPTION_VF)
#define VF_CAP REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 8dac069483e8..3e0ae40ebbd2 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -6,6 +6,13 @@
#include <kunit/test.h>
#include <kunit/visibility.h>
+#include <linux/iosys-map.h>
+#include <linux/math64.h>
+#include <linux/prandom.h>
+#include <linux/swap.h>
+
+#include <uapi/linux/sysinfo.h>
+
#include "tests/xe_kunit_helpers.h"
#include "tests/xe_pci_test.h"
#include "tests/xe_test.h"
@@ -358,9 +365,242 @@ static void xe_bo_evict_kunit(struct kunit *test)
evict_test_run_device(xe);
}
+struct xe_bo_link {
+ struct list_head link;
+ struct xe_bo *bo;
+ u32 val;
+};
+
+#define XE_BO_SHRINK_SIZE ((unsigned long)SZ_64M)
+
+static int shrink_test_fill_random(struct xe_bo *bo, struct rnd_state *state,
+ struct xe_bo_link *link)
+{
+ struct iosys_map map;
+ int ret = ttm_bo_vmap(&bo->ttm, &map);
+ size_t __maybe_unused i;
+
+ if (ret)
+ return ret;
+
+ for (i = 0; i < bo->ttm.base.size; i += sizeof(u32)) {
+ u32 val = prandom_u32_state(state);
+
+ iosys_map_wr(&map, i, u32, val);
+ if (i == 0)
+ link->val = val;
+ }
+
+ ttm_bo_vunmap(&bo->ttm, &map);
+ return 0;
+}
+
+static bool shrink_test_verify(struct kunit *test, struct xe_bo *bo,
+ unsigned int bo_nr, struct rnd_state *state,
+ struct xe_bo_link *link)
+{
+ struct iosys_map map;
+ int ret = ttm_bo_vmap(&bo->ttm, &map);
+ size_t i;
+ bool failed = false;
+
+ if (ret) {
+ KUNIT_FAIL(test, "Error mapping bo %u for content check.\n", bo_nr);
+ return true;
+ }
+
+ for (i = 0; i < bo->ttm.base.size; i += sizeof(u32)) {
+ u32 val = prandom_u32_state(state);
+
+ if (iosys_map_rd(&map, i, u32) != val) {
+ KUNIT_FAIL(test, "Content not preserved, bo %u offset 0x%016llx",
+ bo_nr, (unsigned long long)i);
+ kunit_info(test, "Failed value is 0x%08x, recorded 0x%08x\n",
+ (unsigned int)iosys_map_rd(&map, i, u32), val);
+ if (i == 0 && val != link->val)
+ kunit_info(test, "Looks like PRNG is out of sync.\n");
+ failed = true;
+ break;
+ }
+ }
+
+ ttm_bo_vunmap(&bo->ttm, &map);
+
+ return failed;
+}
+
+/*
+ * Try to create system bos corresponding to twice the amount
+ * of available system memory to test shrinker functionality.
+ * If no swap space is available to accommodate the
+ * memory overcommit, mark bos purgeable.
+ */
+static int shrink_test_run_device(struct xe_device *xe)
+{
+ struct kunit *test = kunit_get_current_test();
+ LIST_HEAD(bos);
+ struct xe_bo_link *link, *next;
+ struct sysinfo si;
+ u64 ram, ram_and_swap, purgeable = 0, alloced, to_alloc, limit;
+ unsigned int interrupted = 0, successful = 0, count = 0;
+ struct rnd_state prng;
+ u64 rand_seed;
+ bool failed = false;
+
+ rand_seed = get_random_u64();
+ prandom_seed_state(&prng, rand_seed);
+ kunit_info(test, "Random seed is 0x%016llx.\n",
+ (unsigned long long)rand_seed);
+
+ /* Skip if execution time is expected to be too long. */
+
+ limit = SZ_32G;
+ /* IGFX with flat CCS needs to copy when swapping / shrinking */
+ if (!IS_DGFX(xe) && xe_device_has_flat_ccs(xe))
+ limit = SZ_16G;
+
+ si_meminfo(&si);
+ ram = (size_t)si.freeram * si.mem_unit;
+ if (ram > limit) {
+ kunit_skip(test, "Too long expected execution time.\n");
+ return 0;
+ }
+ to_alloc = ram * 2;
+
+ ram_and_swap = ram + get_nr_swap_pages() * PAGE_SIZE;
+ if (to_alloc > ram_and_swap)
+ purgeable = to_alloc - ram_and_swap;
+ purgeable += div64_u64(purgeable, 5);
+
+ kunit_info(test, "Free ram is %lu bytes. Will allocate twice of that.\n",
+ (unsigned long)ram);
+ for (alloced = 0; alloced < to_alloc; alloced += XE_BO_SHRINK_SIZE) {
+ struct xe_bo *bo;
+ unsigned int mem_type;
+ struct xe_ttm_tt *xe_tt;
+
+ link = kzalloc(sizeof(*link), GFP_KERNEL);
+ if (!link) {
+ KUNIT_FAIL(test, "Unexpected link allocation failure\n");
+ failed = true;
+ break;
+ }
+
+ INIT_LIST_HEAD(&link->link);
+
+ /* We can create bos using WC caching here. But it is slower. */
+ bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE,
+ DRM_XE_GEM_CPU_CACHING_WB,
+ XE_BO_FLAG_SYSTEM);
+ if (IS_ERR(bo)) {
+ if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) &&
+ bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS))
+ KUNIT_FAIL(test, "Error creating bo: %pe\n", bo);
+ kfree(link);
+ failed = true;
+ break;
+ }
+ xe_bo_lock(bo, false);
+ xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
+
+ /*
+ * Allocate purgeable bos first, because if we do it the
+ * other way around, they may not be subject to swapping...
+ */
+ if (alloced < purgeable) {
+ xe_tt->purgeable = true;
+ bo->ttm.priority = 0;
+ } else {
+ int ret = shrink_test_fill_random(bo, &prng, link);
+
+ if (ret) {
+ xe_bo_unlock(bo);
+ xe_bo_put(bo);
+ KUNIT_FAIL(test, "Error filling bo with random data: %pe\n",
+ ERR_PTR(ret));
+ kfree(link);
+ failed = true;
+ break;
+ }
+ }
+
+ mem_type = bo->ttm.resource->mem_type;
+ xe_bo_unlock(bo);
+ link->bo = bo;
+ list_add_tail(&link->link, &bos);
+
+ if (mem_type != XE_PL_TT) {
+ KUNIT_FAIL(test, "Bo in incorrect memory type: %u\n",
+ bo->ttm.resource->mem_type);
+ failed = true;
+ }
+ cond_resched();
+ if (signal_pending(current))
+ break;
+ }
+
+ /*
+ * Read back and destroy bos. Reset the pseudo-random seed to get an
+ * identical pseudo-random number sequence for readback.
+ */
+ prandom_seed_state(&prng, rand_seed);
+ list_for_each_entry_safe(link, next, &bos, link) {
+ static struct ttm_operation_ctx ctx = {.interruptible = true};
+ struct xe_bo *bo = link->bo;
+ struct xe_ttm_tt *xe_tt;
+ int ret;
+
+ count++;
+ if (!signal_pending(current) && !failed) {
+ bool purgeable, intr = false;
+
+ xe_bo_lock(bo, NULL);
+
+ /* xe_tt->purgeable is cleared on validate. */
+ xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
+ purgeable = xe_tt->purgeable;
+ do {
+ ret = ttm_bo_validate(&bo->ttm, &tt_placement, &ctx);
+ if (ret == -EINTR)
+ intr = true;
+ } while (ret == -EINTR && !signal_pending(current));
+
+ if (!ret && !purgeable)
+ failed = shrink_test_verify(test, bo, count, &prng, link);
+
+ xe_bo_unlock(bo);
+ if (ret) {
+ KUNIT_FAIL(test, "Validation failed: %pe\n",
+ ERR_PTR(ret));
+ failed = true;
+ } else if (intr) {
+ interrupted++;
+ } else {
+ successful++;
+ }
+ }
+ xe_bo_put(link->bo);
+ list_del(&link->link);
+ kfree(link);
+ }
+ kunit_info(test, "Readbacks interrupted: %u successful: %u\n",
+ interrupted, successful);
+
+ return 0;
+}
+
+static void xe_bo_shrink_kunit(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+
+ shrink_test_run_device(xe);
+}
+
static struct kunit_case xe_bo_tests[] = {
KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param),
KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param),
+ KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param,
+ {.speed = KUNIT_SPEED_SLOW}),
{}
};
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
index 79be73b4a02b..6f9b7a266b41 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -43,19 +43,18 @@ static void read_l3cc_table(struct xe_gt *gt,
{
struct kunit *test = kunit_get_current_test();
u32 l3cc, l3cc_expected;
- unsigned int i;
+ unsigned int fw_ref, i;
u32 reg_val;
- u32 ret;
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n");
for (i = 0; i < info->num_mocs_regs; i++) {
if (!(i & 1)) {
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i >> 1));
else
- reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i >> 1));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_LNCFCMOCS(i >> 1));
mocs_dbg(gt, "reg_val=0x%x\n", reg_val);
} else {
@@ -72,7 +71,7 @@ static void read_l3cc_table(struct xe_gt *gt,
KUNIT_EXPECT_EQ_MSG(test, l3cc_expected, l3cc,
"l3cc idx=%u has incorrect val.\n", i);
}
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static void read_mocs_table(struct xe_gt *gt,
@@ -80,21 +79,20 @@ static void read_mocs_table(struct xe_gt *gt,
{
struct kunit *test = kunit_get_current_test();
u32 mocs, mocs_expected;
- unsigned int i;
+ unsigned int fw_ref, i;
u32 reg_val;
- u32 ret;
KUNIT_EXPECT_TRUE_MSG(test, info->unused_entries_index,
"Unused entries index should have been defined\n");
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n");
for (i = 0; i < info->num_mocs_regs; i++) {
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_GLOBAL_MOCS(i));
mocs_expected = get_entry_control(info, i);
mocs = reg_val;
@@ -106,7 +104,7 @@ static void read_mocs_table(struct xe_gt *gt,
"mocs reg 0x%x has incorrect val.\n", i);
}
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static int mocs_kernel_test_run_device(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h
index e22bbf57fca7..04d6b95c6d87 100644
--- a/drivers/gpu/drm/xe/xe_assert.h
+++ b/drivers/gpu/drm/xe/xe_assert.h
@@ -10,7 +10,7 @@
#include <drm/drm_print.h>
-#include "xe_device_types.h"
+#include "xe_gt_types.h"
#include "xe_step.h"
/**
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 2a093540354e..ae6b337cdc54 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -283,6 +283,8 @@ struct xe_ttm_tt {
struct device *dev;
struct sg_table sgt;
struct sg_table *sg;
+ /** @purgeable: Whether the content of the pages of @ttm is purgeable. */
+ bool purgeable;
};
static int xe_tt_map_sg(struct ttm_tt *tt)
@@ -468,7 +470,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
mem->bus.offset += vram->io_start;
mem->bus.is_iomem = true;
-#if !defined(CONFIG_X86)
+#if !IS_ENABLED(CONFIG_X86)
mem->bus.caching = ttm_write_combined;
#endif
return 0;
@@ -761,7 +763,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
if (xe_rpm_reclaim_safe(xe)) {
/*
* We might be called through swapout in the validation path of
- * another TTM device, so unconditionally acquire rpm here.
+ * another TTM device, so acquire rpm here.
*/
xe_pm_runtime_get(xe);
} else {
@@ -901,7 +903,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
}
}
- ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
+ ret = ttm_bo_populate(&bo->ttm, &ctx);
if (ret)
goto err_res_free;
@@ -961,7 +963,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
if (ret)
return ret;
- ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
+ ret = ttm_bo_populate(&bo->ttm, &ctx);
if (ret)
goto err_res_free;
@@ -1089,6 +1091,33 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
}
}
+static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
+{
+ struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+
+ if (ttm_bo->ttm) {
+ struct ttm_placement place = {};
+ int ret = ttm_bo_validate(ttm_bo, &place, ctx);
+
+ drm_WARN_ON(&xe->drm, ret);
+ }
+}
+
+static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
+{
+ struct ttm_operation_ctx ctx = {
+ .interruptible = false
+ };
+
+ if (ttm_bo->ttm) {
+ struct xe_ttm_tt *xe_tt =
+ container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
+
+ if (xe_tt->purgeable)
+ xe_ttm_bo_purge(ttm_bo, &ctx);
+ }
+}
+
const struct ttm_device_funcs xe_ttm_funcs = {
.ttm_tt_create = xe_ttm_tt_create,
.ttm_tt_populate = xe_ttm_tt_populate,
@@ -1101,6 +1130,7 @@ const struct ttm_device_funcs xe_ttm_funcs = {
.release_notify = xe_ttm_bo_release_notify,
.eviction_valuable = ttm_bo_eviction_valuable,
.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
+ .swap_notify = xe_ttm_bo_swap_notify,
};
static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
@@ -1431,7 +1461,8 @@ static struct xe_bo *
__xe_bo_create_locked(struct xe_device *xe,
struct xe_tile *tile, struct xe_vm *vm,
size_t size, u64 start, u64 end,
- u16 cpu_caching, enum ttm_bo_type type, u32 flags)
+ u16 cpu_caching, enum ttm_bo_type type, u32 flags,
+ u64 alignment)
{
struct xe_bo *bo = NULL;
int err;
@@ -1460,6 +1491,8 @@ __xe_bo_create_locked(struct xe_device *xe,
if (IS_ERR(bo))
return bo;
+ bo->min_align = alignment;
+
/*
* Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
* to ensure the shared resv doesn't disappear under the bo, the bo
@@ -1500,16 +1533,18 @@ struct xe_bo *
xe_bo_create_locked_range(struct xe_device *xe,
struct xe_tile *tile, struct xe_vm *vm,
size_t size, u64 start, u64 end,
- enum ttm_bo_type type, u32 flags)
+ enum ttm_bo_type type, u32 flags, u64 alignment)
{
- return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags);
+ return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
+ flags, alignment);
}
struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
enum ttm_bo_type type, u32 flags)
{
- return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags);
+ return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
+ flags, 0);
}
struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
@@ -1519,7 +1554,7 @@ struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
{
struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
cpu_caching, ttm_bo_type_device,
- flags | XE_BO_FLAG_USER);
+ flags | XE_BO_FLAG_USER, 0);
if (!IS_ERR(bo))
xe_bo_unlock_vm_held(bo);
@@ -1543,6 +1578,17 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile
size_t size, u64 offset,
enum ttm_bo_type type, u32 flags)
{
+ return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset,
+ type, flags, 0);
+}
+
+struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
+ struct xe_tile *tile,
+ struct xe_vm *vm,
+ size_t size, u64 offset,
+ enum ttm_bo_type type, u32 flags,
+ u64 alignment)
+{
struct xe_bo *bo;
int err;
u64 start = offset == ~0ull ? 0 : offset;
@@ -1553,7 +1599,8 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile
flags |= XE_BO_FLAG_GGTT;
bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
- flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+ flags | XE_BO_FLAG_NEEDS_CPU_ACCESS,
+ alignment);
if (IS_ERR(bo))
return bo;
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 6e4be52306df..7fa44a0138b0 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -77,7 +77,7 @@ struct xe_bo *
xe_bo_create_locked_range(struct xe_device *xe,
struct xe_tile *tile, struct xe_vm *vm,
size_t size, u64 start, u64 end,
- enum ttm_bo_type type, u32 flags);
+ enum ttm_bo_type type, u32 flags, u64 alignment);
struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
enum ttm_bo_type type, u32 flags);
@@ -94,6 +94,12 @@ struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size, u64 offset,
enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
+ struct xe_tile *tile,
+ struct xe_vm *vm,
+ size_t size, u64 offset,
+ enum ttm_bo_type type, u32 flags,
+ u64 alignment);
struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
const void *data, size_t size,
enum ttm_bo_type type, u32 flags);
@@ -312,8 +318,6 @@ static inline unsigned int xe_sg_segment_size(struct device *dev)
return round_down(max / 2, PAGE_SIZE);
}
-#define i915_gem_object_flush_if_display(obj) ((void)(obj))
-
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
/**
* xe_bo_is_mem_type - Whether the bo currently resides in the given
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 2ed558ac2264..13c6d8a69e91 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -76,9 +76,11 @@ struct xe_bo {
/** @vram_userfault_link: Link into @mem_access.vram_userfault.list */
struct list_head vram_userfault_link;
-};
-#define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
-#define intel_bo_to_i915(bo) to_i915(intel_bo_to_drm_bo(bo)->dev)
+ /** @min_align: minimum alignment needed for this BO if different
+ * from default
+ */
+ u64 min_align;
+};
#endif
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index fe4319eb13fd..492b4877433f 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -90,13 +90,32 @@ static int forcewake_open(struct inode *inode, struct file *file)
{
struct xe_device *xe = inode->i_private;
struct xe_gt *gt;
- u8 id;
+ u8 id, last_gt;
+ unsigned int fw_ref;
xe_pm_runtime_get(xe);
- for_each_gt(gt, xe, id)
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ for_each_gt(gt, xe, id) {
+ last_gt = id;
+
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
+ goto err_fw_get;
+ }
return 0;
+
+err_fw_get:
+ for_each_gt(gt, xe, id) {
+ if (id < last_gt)
+ xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ else if (id == last_gt)
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ else
+ break;
+ }
+
+ xe_pm_runtime_put(xe);
+ return -ETIMEDOUT;
}
static int forcewake_release(struct inode *inode, struct file *file)
@@ -106,7 +125,7 @@ static int forcewake_release(struct inode *inode, struct file *file)
u8 id;
for_each_gt(gt, xe, id)
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
xe_pm_runtime_put(xe);
return 0;
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index bdb76e834e4c..d2679c5d976b 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -6,6 +6,7 @@
#include "xe_devcoredump.h"
#include "xe_devcoredump_types.h"
+#include <linux/ascii85.h>
#include <linux/devcoredump.h>
#include <generated/utsrelease.h>
@@ -16,9 +17,12 @@
#include "xe_force_wake.h"
#include "xe_gt.h"
#include "xe_gt_printk.h"
+#include "xe_guc_capture.h"
#include "xe_guc_ct.h"
+#include "xe_guc_log.h"
#include "xe_guc_submit.h"
#include "xe_hw_engine.h"
+#include "xe_module.h"
#include "xe_sched_job.h"
#include "xe_vm.h"
@@ -85,9 +89,9 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
p = drm_coredump_printer(&iter);
- drm_printf(&p, "**** Xe Device Coredump ****\n");
- drm_printf(&p, "kernel: " UTS_RELEASE "\n");
- drm_printf(&p, "module: " KBUILD_MODNAME "\n");
+ drm_puts(&p, "**** Xe Device Coredump ****\n");
+ drm_puts(&p, "kernel: " UTS_RELEASE "\n");
+ drm_puts(&p, "module: " KBUILD_MODNAME "\n");
ts = ktime_to_timespec64(ss->snapshot_time);
drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
@@ -96,20 +100,27 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
drm_printf(&p, "Process: %s\n", ss->process_name);
xe_device_snapshot_print(xe, &p);
- drm_printf(&p, "\n**** GuC CT ****\n");
- xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p);
- xe_guc_exec_queue_snapshot_print(coredump->snapshot.ge, &p);
+ drm_printf(&p, "\n**** GT #%d ****\n", ss->gt->info.id);
+ drm_printf(&p, "\tTile: %d\n", ss->gt->tile->id);
- drm_printf(&p, "\n**** Job ****\n");
- xe_sched_job_snapshot_print(coredump->snapshot.job, &p);
+ drm_puts(&p, "\n**** GuC Log ****\n");
+ xe_guc_log_snapshot_print(ss->guc.log, &p);
+ drm_puts(&p, "\n**** GuC CT ****\n");
+ xe_guc_ct_snapshot_print(ss->guc.ct, &p);
- drm_printf(&p, "\n**** HW Engines ****\n");
+ drm_puts(&p, "\n**** Contexts ****\n");
+ xe_guc_exec_queue_snapshot_print(ss->ge, &p);
+
+ drm_puts(&p, "\n**** Job ****\n");
+ xe_sched_job_snapshot_print(ss->job, &p);
+
+ drm_puts(&p, "\n**** HW Engines ****\n");
for (i = 0; i < XE_NUM_HW_ENGINES; i++)
- if (coredump->snapshot.hwe[i])
- xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i],
- &p);
- drm_printf(&p, "\n**** VM state ****\n");
- xe_vm_snapshot_print(coredump->snapshot.vm, &p);
+ if (ss->hwe[i])
+ xe_engine_snapshot_print(ss->hwe[i], &p);
+
+ drm_puts(&p, "\n**** VM state ****\n");
+ xe_vm_snapshot_print(ss->vm, &p);
return count - iter.remain;
}
@@ -118,8 +129,14 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
{
int i;
- xe_guc_ct_snapshot_free(ss->ct);
- ss->ct = NULL;
+ xe_guc_log_snapshot_free(ss->guc.log);
+ ss->guc.log = NULL;
+
+ xe_guc_ct_snapshot_free(ss->guc.ct);
+ ss->guc.ct = NULL;
+
+ xe_guc_capture_put_matched_nodes(&ss->gt->uc.guc);
+ ss->matched_node = NULL;
xe_guc_exec_queue_snapshot_free(ss->ge);
ss->ge = NULL;
@@ -141,13 +158,15 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
{
struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot);
+ unsigned int fw_ref;
/* keep going if fw fails as we still want to save the memory and SW data */
- if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL))
+ fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
xe_vm_snapshot_capture_delayed(ss->vm);
xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
- xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(ss->gt), fw_ref);
/* Calculate devcoredump size */
ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump);
@@ -204,6 +223,7 @@ static void xe_devcoredump_free(void *data)
/* To prevent stale data on next snapshot, clear everything */
memset(&coredump->snapshot, 0, sizeof(coredump->snapshot));
coredump->captured = false;
+ coredump->job = NULL;
drm_info(&coredump_to_xe(coredump)->drm,
"Xe device coredump has been deleted.\n");
}
@@ -214,14 +234,13 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
struct xe_exec_queue *q = job->q;
struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_hw_engine *hwe;
- enum xe_hw_engine_id id;
u32 adj_logical_mask = q->logical_mask;
u32 width_mask = (0x1 << q->width) - 1;
const char *process_name = "no process";
- int i;
+ unsigned int fw_ref;
bool cookie;
+ int i;
ss->snapshot_time = ktime_get_real();
ss->boot_time = ktime_get_boottime();
@@ -231,6 +250,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
strscpy(ss->process_name, process_name);
ss->gt = q->gt;
+ coredump->job = job;
INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work);
cookie = dma_fence_begin_signalling();
@@ -244,26 +264,19 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
}
/* keep going if fw fails as we still want to save the memory and SW data */
- if (xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL))
- xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
+ fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
- coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
- coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(q);
- coredump->snapshot.job = xe_sched_job_snapshot_capture(job);
- coredump->snapshot.vm = xe_vm_snapshot_capture(q->vm);
+ ss->guc.log = xe_guc_log_snapshot_capture(&guc->log, true);
+ ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct);
+ ss->ge = xe_guc_exec_queue_snapshot_capture(q);
+ ss->job = xe_sched_job_snapshot_capture(job);
+ ss->vm = xe_vm_snapshot_capture(q->vm);
- for_each_hw_engine(hwe, q->gt, id) {
- if (hwe->class != q->hwe->class ||
- !(BIT(hwe->logical_instance) & adj_logical_mask)) {
- coredump->snapshot.hwe[id] = NULL;
- continue;
- }
- coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe);
- }
+ xe_engine_snapshot_capture_for_job(job);
queue_work(system_unbound_wq, &ss->work);
- xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(q->gt), fw_ref);
dma_fence_end_signalling(cookie);
}
@@ -310,3 +323,89 @@ int xe_devcoredump_init(struct xe_device *xe)
}
#endif
+
+/**
+ * xe_print_blob_ascii85 - print a BLOB to some useful location in ASCII85
+ *
+ * The output is split to multiple lines because some print targets, e.g. dmesg
+ * cannot handle arbitrarily long lines. Note also that printing to dmesg in
+ * piece-meal fashion is not possible, each separate call to drm_puts() has a
+ * line-feed automatically added! Therefore, the entire output line must be
+ * constructed in a local buffer first, then printed in one atomic output call.
+ *
+ * There is also a scheduler yield call to prevent the 'task has been stuck for
+ * 120s' kernel hang check feature from firing when printing to a slow target
+ * such as dmesg over a serial port.
+ *
+ * TODO: Add compression prior to the ASCII85 encoding to shrink huge buffers down.
+ *
+ * @p: the printer object to output to
+ * @prefix: optional prefix to add to output string
+ * @blob: the Binary Large OBject to dump out
+ * @offset: offset in bytes to skip from the front of the BLOB, must be a multiple of sizeof(u32)
+ * @size: the size in bytes of the BLOB, must be a multiple of sizeof(u32)
+ */
+void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix,
+ const void *blob, size_t offset, size_t size)
+{
+ const u32 *blob32 = (const u32 *)blob;
+ char buff[ASCII85_BUFSZ], *line_buff;
+ size_t line_pos = 0;
+
+#define DMESG_MAX_LINE_LEN 800
+#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */
+
+ if (size & 3)
+ drm_printf(p, "Size not word aligned: %zu", size);
+ if (offset & 3)
+ drm_printf(p, "Offset not word aligned: %zu", size);
+
+ line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_KERNEL);
+ if (IS_ERR_OR_NULL(line_buff)) {
+ drm_printf(p, "Failed to allocate line buffer: %pe", line_buff);
+ return;
+ }
+
+ blob32 += offset / sizeof(*blob32);
+ size /= sizeof(*blob32);
+
+ if (prefix) {
+ strscpy(line_buff, prefix, DMESG_MAX_LINE_LEN - MIN_SPACE - 2);
+ line_pos = strlen(line_buff);
+
+ line_buff[line_pos++] = ':';
+ line_buff[line_pos++] = ' ';
+ }
+
+ while (size--) {
+ u32 val = *(blob32++);
+
+ strscpy(line_buff + line_pos, ascii85_encode(val, buff),
+ DMESG_MAX_LINE_LEN - line_pos);
+ line_pos += strlen(line_buff + line_pos);
+
+ if ((line_pos + MIN_SPACE) >= DMESG_MAX_LINE_LEN) {
+ line_buff[line_pos++] = '\n';
+ line_buff[line_pos++] = 0;
+
+ drm_puts(p, line_buff);
+
+ line_pos = 0;
+
+ /* Prevent 'stuck thread' time out errors */
+ cond_resched();
+ }
+ }
+
+ if (line_pos) {
+ line_buff[line_pos++] = '\n';
+ line_buff[line_pos++] = 0;
+
+ drm_puts(p, line_buff);
+ }
+
+ kfree(line_buff);
+
+#undef MIN_SPACE
+#undef DMESG_MAX_LINE_LEN
+}
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h
index e2fa65ce0932..a4eebc285fc8 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump.h
@@ -6,6 +6,9 @@
#ifndef _XE_DEVCOREDUMP_H_
#define _XE_DEVCOREDUMP_H_
+#include <linux/types.h>
+
+struct drm_printer;
struct xe_device;
struct xe_sched_job;
@@ -23,4 +26,7 @@ static inline int xe_devcoredump_init(struct xe_device *xe)
}
#endif
+void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix,
+ const void *blob, size_t offset, size_t size);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
index 440d05d77a5a..3703ddea1252 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -34,16 +34,27 @@ struct xe_devcoredump_snapshot {
/** @work: Workqueue for deferred capture outside of signaling context */
struct work_struct work;
- /* GuC snapshots */
- /** @ct: GuC CT snapshot */
- struct xe_guc_ct_snapshot *ct;
- /** @ge: Guc Engine snapshot */
+ /** @guc: GuC snapshots */
+ struct {
+ /** @guc.ct: GuC CT snapshot */
+ struct xe_guc_ct_snapshot *ct;
+ /** @guc.log: GuC log snapshot */
+ struct xe_guc_log_snapshot *log;
+ } guc;
+
+ /** @ge: GuC Submission Engine snapshot */
struct xe_guc_submit_exec_queue_snapshot *ge;
/** @hwe: HW Engine snapshot array */
struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES];
/** @job: Snapshot of job state */
struct xe_sched_job_snapshot *job;
+ /**
+ * @matched_node: The matched capture node for timedout job
+ * this single-node tracker works because devcoredump will always only
+ * produce one hw-engine capture per devcoredump event
+ */
+ struct __guc_capture_parsed_output *matched_node;
/** @vm: Snapshot of VM state */
struct xe_vm_snapshot *vm;
@@ -69,6 +80,8 @@ struct xe_devcoredump {
bool captured;
/** @snapshot: Snapshot is captured at time of the first crash */
struct xe_devcoredump_snapshot snapshot;
+ /** @job: Point to the faulting job */
+ struct xe_sched_job *job;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index a1987b554a8d..0e2dd691bdae 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -5,10 +5,11 @@
#include "xe_device.h"
+#include <linux/aperture.h>
#include <linux/delay.h>
+#include <linux/fault-inject.h>
#include <linux/units.h>
-#include <drm/drm_aperture.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_client.h>
#include <drm/drm_gem_ttm_helper.h>
@@ -301,7 +302,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
xe_display_driver_set_hooks(&driver);
- err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver);
+ err = aperture_remove_conflicting_pci_devices(pdev, driver.name);
if (err)
return ERR_PTR(err);
@@ -373,6 +374,12 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
err:
return ERR_PTR(err);
}
+ALLOW_ERROR_INJECTION(xe_device_create, ERRNO); /* See xe_pci_probe() */
+
+static bool xe_driver_flr_disabled(struct xe_device *xe)
+{
+ return xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS;
+}
/*
* The driver-initiated FLR is the highest level of reset that we can trigger
@@ -387,17 +394,12 @@ err:
* if/when a new instance of i915 is bound to the device it will do a full
* re-init anyway.
*/
-static void xe_driver_flr(struct xe_device *xe)
+static void __xe_driver_flr(struct xe_device *xe)
{
const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
- struct xe_gt *gt = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
int ret;
- if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) {
- drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
- return;
- }
-
drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
/*
@@ -409,25 +411,25 @@ static void xe_driver_flr(struct xe_device *xe)
* is still pending (unless the HW is totally dead), but better to be
* safe in case something unexpected happens
*/
- ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
if (ret) {
drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
return;
}
- xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
+ xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS);
/* Trigger the actual Driver-FLR */
- xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR);
+ xe_mmio_rmw32(mmio, GU_CNTL, 0, DRIVERFLR);
/* Wait for hardware teardown to complete */
- ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
if (ret) {
drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
return;
}
/* Wait for hardware/firmware re-init to complete */
- ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
+ ret = xe_mmio_wait32(mmio, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
flr_timeout, NULL, false);
if (ret) {
drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
@@ -435,7 +437,17 @@ static void xe_driver_flr(struct xe_device *xe)
}
/* Clear sticky completion status */
- xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
+ xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS);
+}
+
+static void xe_driver_flr(struct xe_device *xe)
+{
+ if (xe_driver_flr_disabled(xe)) {
+ drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
+ return;
+ }
+
+ __xe_driver_flr(xe);
}
static void xe_driver_flr_fini(void *arg)
@@ -478,16 +490,15 @@ mask_err:
return err;
}
-static bool verify_lmem_ready(struct xe_gt *gt)
+static bool verify_lmem_ready(struct xe_device *xe)
{
- u32 val = xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT;
+ u32 val = xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL) & LMEM_INIT;
return !!val;
}
static int wait_for_lmem_ready(struct xe_device *xe)
{
- struct xe_gt *gt = xe_root_mmio_gt(xe);
unsigned long timeout, start;
if (!IS_DGFX(xe))
@@ -496,7 +507,7 @@ static int wait_for_lmem_ready(struct xe_device *xe)
if (IS_SRIOV_VF(xe))
return 0;
- if (verify_lmem_ready(gt))
+ if (verify_lmem_ready(xe))
return 0;
drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
@@ -525,13 +536,14 @@ static int wait_for_lmem_ready(struct xe_device *xe)
msleep(20);
- } while (!verify_lmem_ready(gt));
+ } while (!verify_lmem_ready(xe));
drm_dbg(&xe->drm, "lmem ready after %ums",
jiffies_to_msecs(jiffies - start));
return 0;
}
+ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */
static void update_device_info(struct xe_device *xe)
{
@@ -579,19 +591,21 @@ int xe_device_probe_early(struct xe_device *xe)
return 0;
}
-static int xe_device_set_has_flat_ccs(struct xe_device *xe)
+static int probe_has_flat_ccs(struct xe_device *xe)
{
+ struct xe_gt *gt;
+ unsigned int fw_ref;
u32 reg;
- int err;
+ /* Always enabled/disabled, no runtime check to do */
if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs)
return 0;
- struct xe_gt *gt = xe_root_mmio_gt(xe);
+ gt = xe_root_mmio_gt(xe);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- return err;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return -ETIMEDOUT;
reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE);
@@ -600,7 +614,8 @@ static int xe_device_set_has_flat_ccs(struct xe_device *xe)
drm_dbg(&xe->drm,
"Flat CCS has been disabled in bios, May lead to performance impact");
- return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return 0;
}
int xe_device_probe(struct xe_device *xe)
@@ -636,6 +651,13 @@ int xe_device_probe(struct xe_device *xe)
err = xe_gt_init_early(gt);
if (err)
return err;
+
+ /*
+ * Only after this point can GT-specific MMIO operations
+ * (including things like communication with the GuC)
+ * be performed.
+ */
+ xe_gt_mmio_init(gt);
}
for_each_tile(tile, xe, id) {
@@ -651,11 +673,9 @@ int xe_device_probe(struct xe_device *xe)
err = xe_ggtt_init_early(tile->mem.ggtt);
if (err)
return err;
- if (IS_SRIOV_VF(xe)) {
- err = xe_memirq_init(&tile->sriov.vf.memirq);
- if (err)
- return err;
- }
+ err = xe_memirq_init(&tile->memirq);
+ if (err)
+ return err;
}
for_each_gt(gt, xe, id) {
@@ -679,7 +699,7 @@ int xe_device_probe(struct xe_device *xe)
if (err)
goto err;
- err = xe_device_set_has_flat_ccs(xe);
+ err = probe_has_flat_ccs(xe);
if (err)
goto err;
@@ -789,6 +809,24 @@ void xe_device_remove(struct xe_device *xe)
void xe_device_shutdown(struct xe_device *xe)
{
+ struct xe_gt *gt;
+ u8 id;
+
+ drm_dbg(&xe->drm, "Shutting down device\n");
+
+ if (xe_driver_flr_disabled(xe)) {
+ xe_display_pm_shutdown(xe);
+
+ xe_irq_suspend(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_shutdown(gt);
+
+ xe_display_pm_shutdown_late(xe);
+ } else {
+ /* BOOM! */
+ __xe_driver_flr(xe);
+ }
}
/**
@@ -802,11 +840,9 @@ void xe_device_shutdown(struct xe_device *xe)
*/
void xe_device_wmb(struct xe_device *xe)
{
- struct xe_gt *gt = xe_root_mmio_gt(xe);
-
wmb();
if (IS_DGFX(xe))
- xe_mmio_write32(gt, VF_CAP_REG, 0);
+ xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0);
}
/**
@@ -830,6 +866,7 @@ void xe_device_wmb(struct xe_device *xe)
void xe_device_td_flush(struct xe_device *xe)
{
struct xe_gt *gt;
+ unsigned int fw_ref;
u8 id;
if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
@@ -844,10 +881,11 @@ void xe_device_td_flush(struct xe_device *xe)
if (xe_gt_is_media_type(gt))
continue;
- if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT))
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
return;
- xe_mmio_write32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
+ xe_mmio_write32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
/*
* FIXME: We can likely do better here with our choice of
* timeout. Currently we just assume the worst case, i.e. 150us,
@@ -855,36 +893,36 @@ void xe_device_td_flush(struct xe_device *xe)
* scenario on current platforms if all cache entries are
* transient and need to be flushed..
*/
- if (xe_mmio_wait32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
+ if (xe_mmio_wait32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
150, NULL, false))
xe_gt_err_once(gt, "TD flush timeout\n");
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
}
void xe_device_l2_flush(struct xe_device *xe)
{
struct xe_gt *gt;
- int err;
+ unsigned int fw_ref;
gt = xe_root_mmio_gt(xe);
if (!XE_WA(gt, 16023588340))
return;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
return;
spin_lock(&gt->global_invl_lock);
- xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1);
+ xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1);
- if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
+ if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
xe_gt_err_once(gt, "Global invalidation timeout\n");
spin_unlock(&gt->global_invl_lock);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
@@ -919,6 +957,7 @@ void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p)
for_each_gt(gt, xe, id) {
drm_printf(p, "GT id: %u\n", id);
+ drm_printf(p, "\tTile: %u\n", gt->tile->id);
drm_printf(p, "\tType: %s\n",
gt->info.type == XE_GT_TYPE_MAIN ? "main" : "media");
drm_printf(p, "\tIP ver: %u.%u.%u\n",
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 34620ef855c0..f1fbfe916867 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -9,6 +9,8 @@
#include <drm/drm_util.h>
#include "xe_device_types.h"
+#include "xe_gt_types.h"
+#include "xe_sriov.h"
static inline struct xe_device *to_xe_device(const struct drm_device *dev)
{
@@ -138,7 +140,7 @@ static inline bool xe_device_uc_enabled(struct xe_device *xe)
static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt)
{
- return &gt->mmio.fw;
+ return &gt->pm.fw;
}
void xe_device_assert_mem_access(struct xe_device *xe);
@@ -153,11 +155,22 @@ static inline bool xe_device_has_sriov(struct xe_device *xe)
return xe->info.has_sriov;
}
+static inline bool xe_device_has_msix(struct xe_device *xe)
+{
+ /* TODO: change this when MSI-X support is fully integrated */
+ return false;
+}
+
static inline bool xe_device_has_memirq(struct xe_device *xe)
{
return GRAPHICS_VERx100(xe) >= 1250;
}
+static inline bool xe_device_uses_memirq(struct xe_device *xe)
+{
+ return xe_device_has_memirq(xe) && (IS_SRIOV_VF(xe) || xe_device_has_msix(xe));
+}
+
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size);
void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 687f3a9039bb..b9ea455d6f59 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -14,7 +14,6 @@
#include "xe_devcoredump_types.h"
#include "xe_heci_gsc.h"
-#include "xe_gt_types.h"
#include "xe_lmtt_types.h"
#include "xe_memirq_types.h"
#include "xe_oa.h"
@@ -108,6 +107,45 @@ struct xe_mem_region {
};
/**
+ * struct xe_mmio - register mmio structure
+ *
+ * Represents an MMIO region that the CPU may use to access registers. A
+ * region may share its IO map with other regions (e.g., all GTs within a
+ * tile share the same map with their parent tile, but represent different
+ * subregions of the overall IO space).
+ */
+struct xe_mmio {
+ /** @tile: Backpointer to tile, used for tracing */
+ struct xe_tile *tile;
+
+ /** @regs: Map used to access registers. */
+ void __iomem *regs;
+
+ /**
+ * @sriov_vf_gt: Backpointer to GT.
+ *
+ * This pointer is only set for GT MMIO regions and only when running
+ * as an SRIOV VF structure
+ */
+ struct xe_gt *sriov_vf_gt;
+
+ /**
+ * @regs_size: Length of the register region within the map.
+ *
+ * The size of the iomap set in *regs is generally larger than the
+ * register mmio space since it includes unused regions and/or
+ * non-register regions such as the GGTT PTEs.
+ */
+ size_t regs_size;
+
+ /** @adj_limit: adjust MMIO address if address is below this value */
+ u32 adj_limit;
+
+ /** @adj_offset: offset to add to MMIO address when adjusting */
+ u32 adj_offset;
+};
+
+/**
* struct xe_tile - hardware tile structure
*
* From a driver perspective, a "tile" is effectively a complete GPU, containing
@@ -148,26 +186,14 @@ struct xe_tile {
* * 4MB-8MB: reserved
* * 8MB-16MB: global GTT
*/
- struct {
- /** @mmio.size: size of tile's MMIO space */
- size_t size;
-
- /** @mmio.regs: pointer to tile's MMIO space (starting with registers) */
- void __iomem *regs;
- } mmio;
+ struct xe_mmio mmio;
/**
* @mmio_ext: MMIO-extension info for a tile.
*
* Each tile has its own additional 256MB (28-bit) MMIO-extension space.
*/
- struct {
- /** @mmio_ext.size: size of tile's additional MMIO-extension space */
- size_t size;
-
- /** @mmio_ext.regs: pointer to tile's additional MMIO-extension space */
- void __iomem *regs;
- } mmio_ext;
+ struct xe_mmio mmio_ext;
/** @mem: memory management info for tile */
struct {
@@ -200,14 +226,14 @@ struct xe_tile {
struct xe_lmtt lmtt;
} pf;
struct {
- /** @sriov.vf.memirq: Memory Based Interrupts. */
- struct xe_memirq memirq;
-
/** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */
struct xe_ggtt_node *ggtt_balloon[2];
} vf;
} sriov;
+ /** @memirq: Memory Based Interrupts. */
+ struct xe_memirq memirq;
+
/** @pcode: tile's PCODE */
struct {
/** @pcode.lock: protecting tile's PCODE mailbox data */
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index fb52a23e28f8..22f0f1a6dfd5 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -278,6 +278,7 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
struct xe_hw_engine *hwe;
struct xe_exec_queue *q;
u64 gpu_timestamp;
+ unsigned int fw_ref;
xe_pm_runtime_get(xe);
@@ -303,13 +304,16 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
continue;
fw = xe_hw_engine_to_fw_domain(hwe);
- if (xe_force_wake_get(gt_to_fw(gt), fw)) {
+
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), fw);
+ if (!xe_force_wake_ref_has_domain(fw_ref, fw)) {
hwe = NULL;
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
break;
}
gpu_timestamp = xe_hw_engine_read_timestamp(hwe);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), fw));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
break;
}
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 7deb480e26af..1158b6062a6c 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -143,7 +143,7 @@ struct xe_exec_queue {
/** @hw_engine_group_link: link into exec queues in the same hw engine group */
struct list_head hw_engine_group_link;
/** @lrc: logical ring context for this exec queue */
- struct xe_lrc *lrc[];
+ struct xe_lrc *lrc[] __counted_by(width);
};
/**
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 6a59165b9569..a8c416a48812 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -44,6 +44,7 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
u32 ctx_id)
{
struct xe_gt *gt = hwe->gt;
+ struct xe_mmio *mmio = &gt->mmio;
struct xe_device *xe = gt_to_xe(gt);
u64 lrc_desc;
@@ -58,7 +59,7 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
}
if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
- xe_mmio_write32(hwe->gt, RCU_MODE,
+ xe_mmio_write32(mmio, RCU_MODE,
_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
@@ -76,17 +77,17 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
*/
wmb();
- xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base),
+ xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base),
xe_bo_ggtt_addr(hwe->hwsp));
- xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base));
- xe_mmio_write32(gt, RING_MODE(hwe->mmio_base),
+ xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base));
+ xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base),
_MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
- xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
+ xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
lower_32_bits(lrc_desc));
- xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
+ xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
upper_32_bits(lrc_desc));
- xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base),
+ xe_mmio_write32(mmio, RING_EXECLIST_CONTROL(hwe->mmio_base),
EL_CTRL_LOAD);
}
@@ -168,8 +169,8 @@ static u64 read_execlist_status(struct xe_hw_engine *hwe)
struct xe_gt *gt = hwe->gt;
u32 hi, lo;
- lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
- hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
+ lo = xe_mmio_read32(&gt->mmio, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
+ hi = xe_mmio_read32(&gt->mmio, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
return lo | (u64)hi << 32;
}
@@ -312,7 +313,7 @@ execlist_run_job(struct drm_sched_job *drm_job)
q->ring_ops->emit_job(job);
xe_execlist_make_active(exl);
- return dma_fence_get(job->fence);
+ return job->fence;
}
static void execlist_job_free(struct drm_sched_job *drm_job)
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index 7d9fc489dcb8..4f6784e5abf8 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -21,15 +21,25 @@ static const char *str_wake_sleep(bool wake)
return wake ? "wake" : "sleep";
}
-static void domain_init(struct xe_force_wake_domain *domain,
+static void mark_domain_initialized(struct xe_force_wake *fw,
+ enum xe_force_wake_domain_id id)
+{
+ fw->initialized_domains |= BIT(id);
+}
+
+static void init_domain(struct xe_force_wake *fw,
enum xe_force_wake_domain_id id,
struct xe_reg reg, struct xe_reg ack)
{
+ struct xe_force_wake_domain *domain = &fw->domains[id];
+
domain->id = id;
domain->reg_ctl = reg;
domain->reg_ack = ack;
domain->val = FORCEWAKE_MT(FORCEWAKE_KERNEL);
domain->mask = FORCEWAKE_MT_MASK(FORCEWAKE_KERNEL);
+
+ mark_domain_initialized(fw, id);
}
void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
@@ -43,13 +53,11 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
if (xe->info.graphics_verx100 >= 1270) {
- domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
- XE_FW_DOMAIN_ID_GT,
+ init_domain(fw, XE_FW_DOMAIN_ID_GT,
FORCEWAKE_GT,
FORCEWAKE_ACK_GT_MTL);
} else {
- domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
- XE_FW_DOMAIN_ID_GT,
+ init_domain(fw, XE_FW_DOMAIN_ID_GT,
FORCEWAKE_GT,
FORCEWAKE_ACK_GT);
}
@@ -63,8 +71,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
if (!xe_gt_is_media_type(gt))
- domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER],
- XE_FW_DOMAIN_ID_RENDER,
+ init_domain(fw, XE_FW_DOMAIN_ID_RENDER,
FORCEWAKE_RENDER,
FORCEWAKE_ACK_RENDER);
@@ -72,8 +79,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
if (!(gt->info.engine_mask & BIT(i)))
continue;
- domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j],
- XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j,
+ init_domain(fw, XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j,
FORCEWAKE_MEDIA_VDBOX(j),
FORCEWAKE_ACK_MEDIA_VDBOX(j));
}
@@ -82,15 +88,13 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
if (!(gt->info.engine_mask & BIT(i)))
continue;
- domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j],
- XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j,
+ init_domain(fw, XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j,
FORCEWAKE_MEDIA_VEBOX(j),
FORCEWAKE_ACK_MEDIA_VEBOX(j));
}
if (gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0))
- domain_init(&fw->domains[XE_FW_DOMAIN_ID_GSC],
- XE_FW_DOMAIN_ID_GSC,
+ init_domain(fw, XE_FW_DOMAIN_ID_GSC,
FORCEWAKE_GSC,
FORCEWAKE_ACK_GSC);
}
@@ -100,7 +104,7 @@ static void __domain_ctl(struct xe_gt *gt, struct xe_force_wake_domain *domain,
if (IS_SRIOV_VF(gt_to_xe(gt)))
return;
- xe_mmio_write32(gt, domain->reg_ctl, domain->mask | (wake ? domain->val : 0));
+ xe_mmio_write32(&gt->mmio, domain->reg_ctl, domain->mask | (wake ? domain->val : 0));
}
static int __domain_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain, bool wake)
@@ -111,7 +115,7 @@ static int __domain_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain,
if (IS_SRIOV_VF(gt_to_xe(gt)))
return 0;
- ret = xe_mmio_wait32(gt, domain->reg_ack, domain->val, wake ? domain->val : 0,
+ ret = xe_mmio_wait32(&gt->mmio, domain->reg_ack, domain->val, wake ? domain->val : 0,
XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC,
&value, true);
if (ret)
@@ -156,52 +160,108 @@ static int domain_sleep_wait(struct xe_gt *gt,
(ffs(tmp__) - 1))) && \
domain__->reg_ctl.addr)
-int xe_force_wake_get(struct xe_force_wake *fw,
- enum xe_force_wake_domains domains)
+/**
+ * xe_force_wake_get() : Increase the domain refcount
+ * @fw: struct xe_force_wake
+ * @domains: forcewake domains to get refcount on
+ *
+ * This function wakes up @domains if they are asleep and takes references.
+ * If requested domain is XE_FORCEWAKE_ALL then only applicable/initialized
+ * domains will be considered for refcount and it is a caller responsibility
+ * to check returned ref if it includes any specific domain by using
+ * xe_force_wake_ref_has_domain() function. Caller must call
+ * xe_force_wake_put() function to decrease incremented refcounts.
+ *
+ * Return: opaque reference to woken domains or zero if none of requested
+ * domains were awake.
+ */
+unsigned int __must_check xe_force_wake_get(struct xe_force_wake *fw,
+ enum xe_force_wake_domains domains)
{
struct xe_gt *gt = fw->gt;
struct xe_force_wake_domain *domain;
- enum xe_force_wake_domains tmp, woken = 0;
+ unsigned int ref_incr = 0, awake_rqst = 0, awake_failed = 0;
+ unsigned int tmp, ref_rqst;
unsigned long flags;
- int ret = 0;
+ xe_gt_assert(gt, is_power_of_2(domains));
+ xe_gt_assert(gt, domains <= XE_FORCEWAKE_ALL);
+ xe_gt_assert(gt, domains == XE_FORCEWAKE_ALL || fw->initialized_domains & domains);
+
+ ref_rqst = (domains == XE_FORCEWAKE_ALL) ? fw->initialized_domains : domains;
spin_lock_irqsave(&fw->lock, flags);
- for_each_fw_domain_masked(domain, domains, fw, tmp) {
+ for_each_fw_domain_masked(domain, ref_rqst, fw, tmp) {
if (!domain->ref++) {
- woken |= BIT(domain->id);
+ awake_rqst |= BIT(domain->id);
domain_wake(gt, domain);
}
+ ref_incr |= BIT(domain->id);
}
- for_each_fw_domain_masked(domain, woken, fw, tmp) {
- ret |= domain_wake_wait(gt, domain);
+ for_each_fw_domain_masked(domain, awake_rqst, fw, tmp) {
+ if (domain_wake_wait(gt, domain) == 0) {
+ fw->awake_domains |= BIT(domain->id);
+ } else {
+ awake_failed |= BIT(domain->id);
+ --domain->ref;
+ }
}
- fw->awake_domains |= woken;
+ ref_incr &= ~awake_failed;
spin_unlock_irqrestore(&fw->lock, flags);
- return ret;
+ xe_gt_WARN(gt, awake_failed, "Forcewake domain%s %#x failed to acknowledge awake request\n",
+ str_plural(hweight_long(awake_failed)), awake_failed);
+
+ if (domains == XE_FORCEWAKE_ALL && ref_incr == fw->initialized_domains)
+ ref_incr |= XE_FORCEWAKE_ALL;
+
+ return ref_incr;
}
-int xe_force_wake_put(struct xe_force_wake *fw,
- enum xe_force_wake_domains domains)
+/**
+ * xe_force_wake_put - Decrement the refcount and put domain to sleep if refcount becomes 0
+ * @fw: Pointer to the force wake structure
+ * @fw_ref: return of xe_force_wake_get()
+ *
+ * This function reduces the reference counts for domains in fw_ref. If
+ * refcount for any of the specified domain reaches 0, it puts the domain to sleep
+ * and waits for acknowledgment for domain to sleep within 50 milisec timeout.
+ * Warns in case of timeout of ack from domain.
+ */
+void xe_force_wake_put(struct xe_force_wake *fw, unsigned int fw_ref)
{
struct xe_gt *gt = fw->gt;
struct xe_force_wake_domain *domain;
- enum xe_force_wake_domains tmp, sleep = 0;
+ unsigned int tmp, sleep = 0;
unsigned long flags;
- int ret = 0;
+ int ack_fail = 0;
+
+ /*
+ * Avoid unnecessary lock and unlock when the function is called
+ * in error path of individual domains.
+ */
+ if (!fw_ref)
+ return;
+
+ if (xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
+ fw_ref = fw->initialized_domains;
spin_lock_irqsave(&fw->lock, flags);
- for_each_fw_domain_masked(domain, domains, fw, tmp) {
+ for_each_fw_domain_masked(domain, fw_ref, fw, tmp) {
+ xe_gt_assert(gt, domain->ref);
+
if (!--domain->ref) {
sleep |= BIT(domain->id);
domain_sleep(gt, domain);
}
}
for_each_fw_domain_masked(domain, sleep, fw, tmp) {
- ret |= domain_sleep_wait(gt, domain);
+ if (domain_sleep_wait(gt, domain) == 0)
+ fw->awake_domains &= ~BIT(domain->id);
+ else
+ ack_fail |= BIT(domain->id);
}
- fw->awake_domains &= ~sleep;
spin_unlock_irqrestore(&fw->lock, flags);
- return ret;
+ xe_gt_WARN(gt, ack_fail, "Forcewake domain%s %#x failed to acknowledge sleep request\n",
+ str_plural(hweight_long(ack_fail)), ack_fail);
}
diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h
index a2577672f4e3..0e3e84bfa51c 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.h
+++ b/drivers/gpu/drm/xe/xe_force_wake.h
@@ -15,10 +15,9 @@ void xe_force_wake_init_gt(struct xe_gt *gt,
struct xe_force_wake *fw);
void xe_force_wake_init_engines(struct xe_gt *gt,
struct xe_force_wake *fw);
-int xe_force_wake_get(struct xe_force_wake *fw,
- enum xe_force_wake_domains domains);
-int xe_force_wake_put(struct xe_force_wake *fw,
- enum xe_force_wake_domains domains);
+unsigned int __must_check xe_force_wake_get(struct xe_force_wake *fw,
+ enum xe_force_wake_domains domains);
+void xe_force_wake_put(struct xe_force_wake *fw, unsigned int fw_ref);
static inline int
xe_force_wake_ref(struct xe_force_wake *fw,
@@ -46,4 +45,20 @@ xe_force_wake_assert_held(struct xe_force_wake *fw,
xe_gt_assert(fw->gt, fw->awake_domains & domain);
}
+/**
+ * xe_force_wake_ref_has_domain - verifies if the domains are in fw_ref
+ * @fw_ref : the force_wake reference
+ * @domain : forcewake domain to verify
+ *
+ * This function confirms whether the @fw_ref includes a reference to the
+ * specified @domain.
+ *
+ * Return: true if domain is refcounted.
+ */
+static inline bool
+xe_force_wake_ref_has_domain(unsigned int fw_ref, enum xe_force_wake_domains domain)
+{
+ return fw_ref & domain;
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h
index ed0edc2cdf9f..899fbbcb3ea9 100644
--- a/drivers/gpu/drm/xe/xe_force_wake_types.h
+++ b/drivers/gpu/drm/xe/xe_force_wake_types.h
@@ -48,7 +48,7 @@ enum xe_force_wake_domains {
XE_FW_MEDIA_VEBOX2 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX2),
XE_FW_MEDIA_VEBOX3 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX3),
XE_FW_GSC = BIT(XE_FW_DOMAIN_ID_GSC),
- XE_FORCEWAKE_ALL = BIT(XE_FW_DOMAIN_ID_COUNT) - 1
+ XE_FORCEWAKE_ALL = BIT(XE_FW_DOMAIN_ID_COUNT)
};
/**
@@ -78,7 +78,9 @@ struct xe_force_wake {
/** @lock: protects everything force wake struct */
spinlock_t lock;
/** @awake_domains: mask of all domains awake */
- enum xe_force_wake_domains awake_domains;
+ unsigned int awake_domains;
+ /** @initialized_domains: mask of all initialized domains */
+ unsigned int initialized_domains;
/** @domains: force wake domains */
struct xe_force_wake_domain domains[XE_FW_DOMAIN_ID_COUNT];
};
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index ff19eca5d358..558fac8bb6fb 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -5,6 +5,7 @@
#include "xe_ggtt.h"
+#include <linux/fault-inject.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/sizes.h>
@@ -107,8 +108,10 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev)
static void ggtt_update_access_counter(struct xe_ggtt *ggtt)
{
- struct xe_gt *gt = XE_WA(ggtt->tile->primary_gt, 22019338487) ? ggtt->tile->primary_gt :
- ggtt->tile->media_gt;
+ struct xe_tile *tile = ggtt->tile;
+ struct xe_gt *affected_gt = XE_WA(tile->primary_gt, 22019338487) ?
+ tile->primary_gt : tile->media_gt;
+ struct xe_mmio *mmio = &affected_gt->mmio;
u32 max_gtt_writes = XE_WA(ggtt->tile->primary_gt, 22019338487) ? 1100 : 63;
/*
* Wa_22019338487: GMD_ID is a RO register, a dummy write forces gunit
@@ -118,7 +121,7 @@ static void ggtt_update_access_counter(struct xe_ggtt *ggtt)
lockdep_assert_held(&ggtt->lock);
if ((++ggtt->access_count % max_gtt_writes) == 0) {
- xe_mmio_write32(gt, GMD_ID, 0x0);
+ xe_mmio_write32(mmio, GMD_ID, 0x0);
ggtt->access_count = 0;
}
}
@@ -243,7 +246,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
else
ggtt->pt_ops = &xelp_pt_ops;
- ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, 0);
+ ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM);
drm_mm_init(&ggtt->mm, xe_wopcm_size(xe),
ggtt->size - xe_wopcm_size(xe));
@@ -262,6 +265,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_ggtt_init_early, ERRNO); /* See xe_pci_probe() */
static void xe_ggtt_invalidate(struct xe_ggtt *ggtt);
@@ -405,7 +409,7 @@ static void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
* vs. correct GGTT page. Not particularly a hot code path so blindly
* do a mmio read here which results in GuC reading correct GGTT page.
*/
- xe_mmio_read32(xe_root_mmio_gt(xe), VF_CAP_REG);
+ xe_mmio_read32(xe_root_tile_mmio(xe), VF_CAP_REG);
/* Each GT in a tile has its own TLB to cache GGTT lookups */
ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt);
@@ -609,7 +613,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
u64 start, u64 end)
{
int err;
- u64 alignment = XE_PAGE_SIZE;
+ u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE;
if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
alignment = SZ_64K;
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index 6fbea70d3d36..1eb791ddc375 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -34,6 +34,7 @@
#include "instructions/xe_gsc_commands.h"
#include "regs/xe_gsc_regs.h"
#include "regs/xe_gt_regs.h"
+#include "regs/xe_irq_regs.h"
static struct xe_gt *
gsc_to_gt(struct xe_gsc *gsc)
@@ -179,7 +180,7 @@ out_bo:
static int gsc_fw_is_loaded(struct xe_gt *gt)
{
- return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) &
+ return xe_mmio_read32(&gt->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) &
HECI1_FWSTS1_INIT_COMPLETE;
}
@@ -190,7 +191,7 @@ static int gsc_fw_wait(struct xe_gt *gt)
* executed by the GSCCS. To account for possible submission delays or
* other issues, we use a 500ms timeout in the wait here.
*/
- return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE),
+ return xe_mmio_wait32(&gt->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE),
HECI1_FWSTS1_INIT_COMPLETE,
HECI1_FWSTS1_INIT_COMPLETE,
500 * USEC_PER_MSEC, NULL, false);
@@ -260,19 +261,17 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
{
struct xe_gt *gt = gsc_to_gt(gsc);
struct xe_tile *tile = gt_to_tile(gt);
+ unsigned int fw_ref;
int ret;
if (XE_WA(tile->primary_gt, 14018094691)) {
- ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
+ fw_ref = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
/*
* If the forcewake fails we want to keep going, because the worst
* case outcome in failing to apply the WA is that PXP won't work,
- * which is not fatal. We still throw a warning so the issue is
- * seen if it happens.
+ * which is not fatal. Forcewake get warns implicitly in case of failure
*/
- xe_gt_WARN_ON(tile->primary_gt, ret);
-
xe_gt_mcr_multicast_write(tile->primary_gt,
EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK,
EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT);
@@ -281,7 +280,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
ret = gsc_upload(gsc);
if (XE_WA(tile->primary_gt, 14018094691))
- xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(tile->primary_gt), fw_ref);
if (ret)
return ret;
@@ -330,7 +329,7 @@ static int gsc_er_complete(struct xe_gt *gt)
* so in that scenario we're always guaranteed to find the correct
* value.
*/
- er_status = xe_mmio_read32(gt, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE;
+ er_status = xe_mmio_read32(&gt->mmio, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE;
if (er_status == GSCI_TIMER_STATUS_TIMER_EXPIRED) {
/*
@@ -351,6 +350,7 @@ static void gsc_work(struct work_struct *work)
struct xe_gsc *gsc = container_of(work, typeof(*gsc), work);
struct xe_gt *gt = gsc_to_gt(gsc);
struct xe_device *xe = gt_to_xe(gt);
+ unsigned int fw_ref;
u32 actions;
int ret;
@@ -360,7 +360,7 @@ static void gsc_work(struct work_struct *work)
spin_unlock_irq(&gsc->lock);
xe_pm_runtime_get(xe);
- xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
if (actions & GSC_ACTION_ER_COMPLETE) {
ret = gsc_er_complete(gt);
@@ -380,7 +380,7 @@ static void gsc_work(struct work_struct *work)
xe_gsc_proxy_request_handler(gsc);
out:
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_pm_runtime_put(xe);
}
@@ -581,11 +581,11 @@ void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep)
if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt))
return;
- xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set);
+ xe_mmio_rmw32(&gt->mmio, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set);
if (prep) {
/* make sure the reset bit is clear when writing the CSR reg */
- xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE),
+ xe_mmio_rmw32(&gt->mmio, HECI_H_CSR(MTL_GSC_HECI2_BASE),
HECI_H_CSR_RST, HECI_H_CSR_IG);
msleep(200);
}
@@ -599,7 +599,8 @@ void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep)
void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p)
{
struct xe_gt *gt = gsc_to_gt(gsc);
- int err;
+ struct xe_mmio *mmio = &gt->mmio;
+ unsigned int fw_ref;
xe_uc_fw_print(&gsc->fw, p);
@@ -608,17 +609,17 @@ void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p)
if (!xe_uc_fw_is_enabled(&gsc->fw))
return;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
+ if (!fw_ref)
return;
drm_printf(p, "\nHECI1 FWSTS: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
- xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)),
- xe_mmio_read32(gt, HECI_FWSTS2(MTL_GSC_HECI1_BASE)),
- xe_mmio_read32(gt, HECI_FWSTS3(MTL_GSC_HECI1_BASE)),
- xe_mmio_read32(gt, HECI_FWSTS4(MTL_GSC_HECI1_BASE)),
- xe_mmio_read32(gt, HECI_FWSTS5(MTL_GSC_HECI1_BASE)),
- xe_mmio_read32(gt, HECI_FWSTS6(MTL_GSC_HECI1_BASE)));
-
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+ xe_mmio_read32(mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE)),
+ xe_mmio_read32(mmio, HECI_FWSTS2(MTL_GSC_HECI1_BASE)),
+ xe_mmio_read32(mmio, HECI_FWSTS3(MTL_GSC_HECI1_BASE)),
+ xe_mmio_read32(mmio, HECI_FWSTS4(MTL_GSC_HECI1_BASE)),
+ xe_mmio_read32(mmio, HECI_FWSTS5(MTL_GSC_HECI1_BASE)),
+ xe_mmio_read32(mmio, HECI_FWSTS6(MTL_GSC_HECI1_BASE)));
+
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
index 2d6ea8c01445..fc64b45d324b 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.c
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -65,7 +65,7 @@ gsc_to_gt(struct xe_gsc *gsc)
bool xe_gsc_proxy_init_done(struct xe_gsc *gsc)
{
struct xe_gt *gt = gsc_to_gt(gsc);
- u32 fwsts1 = xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE));
+ u32 fwsts1 = xe_mmio_read32(&gt->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE));
return REG_FIELD_GET(HECI1_FWSTS1_CURRENT_STATE, fwsts1) ==
HECI1_FWSTS1_PROXY_STATE_NORMAL;
@@ -78,7 +78,7 @@ static void __gsc_proxy_irq_rmw(struct xe_gsc *gsc, u32 clr, u32 set)
/* make sure we never accidentally write the RST bit */
clr |= HECI_H_CSR_RST;
- xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), clr, set);
+ xe_mmio_rmw32(&gt->mmio, HECI_H_CSR(MTL_GSC_HECI2_BASE), clr, set);
}
static void gsc_proxy_irq_clear(struct xe_gsc *gsc)
@@ -450,22 +450,21 @@ void xe_gsc_proxy_remove(struct xe_gsc *gsc)
{
struct xe_gt *gt = gsc_to_gt(gsc);
struct xe_device *xe = gt_to_xe(gt);
- int err = 0;
+ unsigned int fw_ref = 0;
if (!gsc->proxy.component_added)
return;
/* disable HECI2 IRQs */
xe_pm_runtime_get(xe);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
+ if (!fw_ref)
xe_gt_err(gt, "failed to get forcewake to disable GSC interrupts\n");
/* try do disable irq even if forcewake failed */
gsc_proxy_irq_toggle(gsc, false);
- if (!err)
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_pm_runtime_put(xe);
xe_gsc_wait_for_worker_completion(gsc);
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index d5fd6a089b7c..d6744be01a68 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -77,7 +77,8 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
return ERR_PTR(-ENOMEM);
gt->tile = tile;
- gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
+ gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq",
+ WQ_MEM_RECLAIM);
err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt);
if (err)
@@ -97,14 +98,14 @@ void xe_gt_sanitize(struct xe_gt *gt)
static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
{
+ unsigned int fw_ref;
u32 reg;
- int err;
if (!XE_WA(gt, 16023588340))
return;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (WARN_ON(err))
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
return;
if (!xe_gt_is_media_type(gt)) {
@@ -114,13 +115,13 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
}
xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
{
+ unsigned int fw_ref;
u32 reg;
- int err;
if (!XE_WA(gt, 16023588340))
return;
@@ -128,15 +129,15 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
if (xe_gt_is_media_type(gt))
return;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (WARN_ON(err))
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
return;
reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
reg &= ~CG_DIS_CNTLBUS;
xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
/**
@@ -244,7 +245,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
else if (entry->clr_bits + 1)
val = (reg.mcr ?
xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
- xe_mmio_read32(gt, reg)) & (~entry->clr_bits);
+ xe_mmio_read32(&gt->mmio, reg)) & (~entry->clr_bits);
else
val = 0;
@@ -402,11 +403,14 @@ static void dump_pat_on_error(struct xe_gt *gt)
static int gt_fw_domain_init(struct xe_gt *gt)
{
+ unsigned int fw_ref;
int err, i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref) {
+ err = -ETIMEDOUT;
goto err_hw_fence_irq;
+ }
if (!xe_gt_is_media_type(gt)) {
err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt);
@@ -439,16 +443,14 @@ static int gt_fw_domain_init(struct xe_gt *gt)
* Stash hardware-reported version. Since this register does not exist
* on pre-MTL platforms, reading it there will (correctly) return 0.
*/
- gt->info.gmdid = xe_mmio_read32(gt, GMD_ID);
-
- err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
- XE_WARN_ON(err);
+ gt->info.gmdid = xe_mmio_read32(&gt->mmio, GMD_ID);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
err_force_wake:
dump_pat_on_error(gt);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
err_hw_fence_irq:
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
xe_hw_fence_irq_finish(&gt->fence_irq[i]);
@@ -458,11 +460,14 @@ err_hw_fence_irq:
static int all_fw_domain_init(struct xe_gt *gt)
{
+ unsigned int fw_ref;
int err, i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (err)
- goto err_hw_fence_irq;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ err = -ETIMEDOUT;
+ goto err_force_wake;
+ }
xe_gt_mcr_set_implicit_defaults(gt);
xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
@@ -526,14 +531,12 @@ static int all_fw_domain_init(struct xe_gt *gt)
if (IS_SRIOV_PF(gt_to_xe(gt)))
xe_gt_sriov_pf_init_hw(gt);
- err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- XE_WARN_ON(err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
err_force_wake:
- xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
-err_hw_fence_irq:
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
xe_hw_fence_irq_finish(&gt->fence_irq[i]);
@@ -546,11 +549,12 @@ err_hw_fence_irq:
*/
int xe_gt_init_hwconfig(struct xe_gt *gt)
{
+ unsigned int fw_ref;
int err;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- goto out;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return -ETIMEDOUT;
xe_gt_mcr_init_early(gt);
xe_pat_init(gt);
@@ -568,8 +572,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
xe_gt_enable_host_l2_vram(gt);
out_fw:
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-out:
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return err;
}
@@ -622,6 +625,30 @@ int xe_gt_init(struct xe_gt *gt)
return 0;
}
+/**
+ * xe_gt_mmio_init() - Initialize GT's MMIO access
+ * @gt: the GT object
+ *
+ * Initialize GT's MMIO accessor, which will be used to access registers inside
+ * this GT.
+ */
+void xe_gt_mmio_init(struct xe_gt *gt)
+{
+ struct xe_tile *tile = gt_to_tile(gt);
+
+ gt->mmio.regs = tile->mmio.regs;
+ gt->mmio.regs_size = tile->mmio.regs_size;
+ gt->mmio.tile = tile;
+
+ if (gt->info.type == XE_GT_TYPE_MEDIA) {
+ gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET;
+ gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH;
+ }
+
+ if (IS_SRIOV_VF(gt_to_xe(gt)))
+ gt->mmio.sriov_vf_gt = gt;
+}
+
void xe_gt_record_user_engines(struct xe_gt *gt)
{
struct xe_hw_engine *hwe;
@@ -649,8 +676,8 @@ static int do_gt_reset(struct xe_gt *gt)
xe_gsc_wa_14015076503(gt, true);
- xe_mmio_write32(gt, GDRST, GRDOM_FULL);
- err = xe_mmio_wait32(gt, GDRST, GRDOM_FULL, 0, 5000, NULL, false);
+ xe_mmio_write32(&gt->mmio, GDRST, GRDOM_FULL);
+ err = xe_mmio_wait32(&gt->mmio, GDRST, GRDOM_FULL, 0, 5000, NULL, false);
if (err)
xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n",
ERR_PTR(err));
@@ -740,6 +767,7 @@ static int do_gt_restart(struct xe_gt *gt)
static int gt_reset(struct xe_gt *gt)
{
+ unsigned int fw_ref;
int err;
if (xe_device_wedged(gt_to_xe(gt)))
@@ -760,9 +788,11 @@ static int gt_reset(struct xe_gt *gt)
xe_gt_sanitize(gt);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (err)
- goto err_msg;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ err = -ETIMEDOUT;
+ goto err_out;
+ }
xe_uc_gucrc_disable(&gt->uc);
xe_uc_stop_prepare(&gt->uc);
@@ -780,8 +810,7 @@ static int gt_reset(struct xe_gt *gt)
if (err)
goto err_out;
- err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- XE_WARN_ON(err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_pm_runtime_put(gt_to_xe(gt));
xe_gt_info(gt, "reset done\n");
@@ -789,8 +818,7 @@ static int gt_reset(struct xe_gt *gt)
return 0;
err_out:
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-err_msg:
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
XE_WARN_ON(xe_uc_start(&gt->uc));
err_fail:
xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
@@ -810,7 +838,7 @@ static void gt_reset_worker(struct work_struct *w)
void xe_gt_reset_async(struct xe_gt *gt)
{
- xe_gt_info(gt, "trying reset\n");
+ xe_gt_info(gt, "trying reset from %ps\n", __builtin_return_address(0));
/* Don't do a reset while one is already in flight */
if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(&gt->uc))
@@ -822,22 +850,25 @@ void xe_gt_reset_async(struct xe_gt *gt)
void xe_gt_suspend_prepare(struct xe_gt *gt)
{
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ unsigned int fw_ref;
+
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
xe_uc_stop_prepare(&gt->uc);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
int xe_gt_suspend(struct xe_gt *gt)
{
+ unsigned int fw_ref;
int err;
xe_gt_dbg(gt, "suspending\n");
xe_gt_sanitize(gt);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
goto err_msg;
err = xe_uc_suspend(&gt->uc);
@@ -848,19 +879,29 @@ int xe_gt_suspend(struct xe_gt *gt)
xe_gt_disable_host_l2_vram(gt);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_gt_dbg(gt, "suspended\n");
return 0;
-err_force_wake:
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
err_msg:
+ err = -ETIMEDOUT;
+err_force_wake:
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err));
return err;
}
+void xe_gt_shutdown(struct xe_gt *gt)
+{
+ unsigned int fw_ref;
+
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ do_gt_reset(gt);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+}
+
/**
* xe_gt_sanitize_freq() - Restore saved frequencies if necessary.
* @gt: the GT object
@@ -883,11 +924,12 @@ int xe_gt_sanitize_freq(struct xe_gt *gt)
int xe_gt_resume(struct xe_gt *gt)
{
+ unsigned int fw_ref;
int err;
xe_gt_dbg(gt, "resuming\n");
- err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
goto err_msg;
err = do_gt_restart(gt);
@@ -896,14 +938,15 @@ int xe_gt_resume(struct xe_gt *gt)
xe_gt_idle_enable_pg(gt);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_gt_dbg(gt, "resumed\n");
return 0;
-err_force_wake:
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
err_msg:
+ err = -ETIMEDOUT;
+err_force_wake:
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err));
return err;
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index ee138e9768a2..82b9b7f82fca 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -31,6 +31,7 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile);
int xe_gt_init_hwconfig(struct xe_gt *gt);
int xe_gt_init_early(struct xe_gt *gt);
int xe_gt_init(struct xe_gt *gt);
+void xe_gt_mmio_init(struct xe_gt *gt);
void xe_gt_declare_wedged(struct xe_gt *gt);
int xe_gt_record_default_lrcs(struct xe_gt *gt);
@@ -48,6 +49,7 @@ void xe_gt_record_user_engines(struct xe_gt *gt);
void xe_gt_suspend_prepare(struct xe_gt *gt);
int xe_gt_suspend(struct xe_gt *gt);
+void xe_gt_shutdown(struct xe_gt *gt);
int xe_gt_resume(struct xe_gt *gt);
void xe_gt_reset_async(struct xe_gt *gt);
void xe_gt_sanitize(struct xe_gt *gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
index ffcbd05671fc..b6adfb9f2030 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -74,7 +74,7 @@ static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines)
* platforms as these bits are unused there.
*/
mode |= CCS_MODE_CSLICE_0_3_MASK << 16;
- xe_mmio_write32(gt, CCS_MODE, mode);
+ xe_mmio_write32(&gt->mmio, CCS_MODE, mode);
xe_gt_dbg(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n",
mode, config, num_engines, num_slices);
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index 86c2d62b4bdc..cc2ae159298e 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -17,7 +17,7 @@
static u32 read_reference_ts_freq(struct xe_gt *gt)
{
- u32 ts_override = xe_mmio_read32(gt, TIMESTAMP_OVERRIDE);
+ u32 ts_override = xe_mmio_read32(&gt->mmio, TIMESTAMP_OVERRIDE);
u32 base_freq, frac_freq;
base_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK,
@@ -57,7 +57,7 @@ static u32 get_crystal_clock_freq(u32 rpm_config_reg)
int xe_gt_clock_init(struct xe_gt *gt)
{
- u32 ctc_reg = xe_mmio_read32(gt, CTC_MODE);
+ u32 ctc_reg = xe_mmio_read32(&gt->mmio, CTC_MODE);
u32 freq = 0;
/* Assuming gen11+ so assert this assumption is correct */
@@ -66,7 +66,7 @@ int xe_gt_clock_init(struct xe_gt *gt)
if (ctc_reg & CTC_SOURCE_DIVIDE_LOGIC) {
freq = read_reference_ts_freq(gt);
} else {
- u32 c0 = xe_mmio_read32(gt, RPM_CONFIG0);
+ u32 c0 = xe_mmio_read32(&gt->mmio, RPM_CONFIG0);
freq = get_crystal_clock_freq(c0);
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 8f95d3a5949b..3e8c351a0eab 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -15,6 +15,7 @@
#include "xe_ggtt.h"
#include "xe_gt.h"
#include "xe_gt_mcr.h"
+#include "xe_gt_idle.h"
#include "xe_gt_sriov_pf_debugfs.h"
#include "xe_gt_sriov_vf_debugfs.h"
#include "xe_gt_stats.h"
@@ -89,26 +90,36 @@ static int hw_engines(struct xe_gt *gt, struct drm_printer *p)
struct xe_device *xe = gt_to_xe(gt);
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
- int err;
+ unsigned int fw_ref;
xe_pm_runtime_get(xe);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (err) {
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
xe_pm_runtime_put(xe);
- return err;
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return -ETIMEDOUT;
}
for_each_hw_engine(hwe, gt, id)
xe_hw_engine_print(hwe, p);
- err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_pm_runtime_put(xe);
- if (err)
- return err;
return 0;
}
+static int powergate_info(struct xe_gt *gt, struct drm_printer *p)
+{
+ int ret;
+
+ xe_pm_runtime_get(gt_to_xe(gt));
+ ret = xe_gt_idle_pg_print(gt, p);
+ xe_pm_runtime_put(gt_to_xe(gt));
+
+ return ret;
+}
+
static int force_reset(struct xe_gt *gt, struct drm_printer *p)
{
xe_pm_runtime_get(gt_to_xe(gt));
@@ -288,6 +299,7 @@ static const struct drm_info_list debugfs_list[] = {
{"topology", .show = xe_gt_debugfs_simple_show, .data = topology},
{"steering", .show = xe_gt_debugfs_simple_show, .data = steering},
{"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt},
+ {"powergate_info", .show = xe_gt_debugfs_simple_show, .data = powergate_info},
{"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore},
{"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds},
{"pat", .show = xe_gt_debugfs_simple_show, .data = pat},
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index ab76973f3e1e..6bd39b2c5003 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -11,9 +11,9 @@
#include <drm/drm_managed.h>
#include <drm/drm_print.h>
-#include "xe_device_types.h"
#include "xe_gt_sysfs.h"
#include "xe_gt_throttle.h"
+#include "xe_gt_types.h"
#include "xe_guc_pc.h"
#include "xe_pm.h"
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index 67aba4140510..fd80afeef56a 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -98,7 +98,10 @@ static u64 get_residency_ms(struct xe_gt_idle *gtidle, u64 cur_residency)
void xe_gt_idle_enable_pg(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
- u32 pg_enable;
+ struct xe_gt_idle *gtidle = &gt->gtidle;
+ struct xe_mmio *mmio = &gt->mmio;
+ u32 vcs_mask, vecs_mask;
+ unsigned int fw_ref;
int i, j;
if (IS_SRIOV_VF(xe))
@@ -110,39 +113,136 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt)
xe_device_assert_mem_access(gt_to_xe(gt));
- pg_enable = RENDER_POWERGATE_ENABLE | MEDIA_POWERGATE_ENABLE;
+ vcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_DECODE);
+ vecs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE);
+
+ if (vcs_mask || vecs_mask)
+ gtidle->powergate_enable = MEDIA_POWERGATE_ENABLE;
+
+ if (!xe_gt_is_media_type(gt))
+ gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE;
for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
if ((gt->info.engine_mask & BIT(i)))
- pg_enable |= (VDN_HCP_POWERGATE_ENABLE(j) |
- VDN_MFXVDENC_POWERGATE_ENABLE(j));
+ gtidle->powergate_enable |= (VDN_HCP_POWERGATE_ENABLE(j) |
+ VDN_MFXVDENC_POWERGATE_ENABLE(j));
}
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (xe->info.skip_guc_pc) {
/*
* GuC sets the hysteresis value when GuC PC is enabled
* else set it to 25 (25 * 1.28us)
*/
- xe_mmio_write32(gt, MEDIA_POWERGATE_IDLE_HYSTERESIS, 25);
- xe_mmio_write32(gt, RENDER_POWERGATE_IDLE_HYSTERESIS, 25);
+ xe_mmio_write32(mmio, MEDIA_POWERGATE_IDLE_HYSTERESIS, 25);
+ xe_mmio_write32(mmio, RENDER_POWERGATE_IDLE_HYSTERESIS, 25);
}
- xe_mmio_write32(gt, POWERGATE_ENABLE, pg_enable);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
+ xe_mmio_write32(mmio, POWERGATE_ENABLE, gtidle->powergate_enable);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
void xe_gt_idle_disable_pg(struct xe_gt *gt)
{
+ struct xe_gt_idle *gtidle = &gt->gtidle;
+ unsigned int fw_ref;
+
if (IS_SRIOV_VF(gt_to_xe(gt)))
return;
xe_device_assert_mem_access(gt_to_xe(gt));
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+ gtidle->powergate_enable = 0;
+
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ xe_mmio_write32(&gt->mmio, POWERGATE_ENABLE, gtidle->powergate_enable);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+}
+
+/**
+ * xe_gt_idle_pg_print - Xe powergating info
+ * @gt: GT object
+ * @p: drm_printer.
+ *
+ * This function prints the powergating information
+ *
+ * Return: 0 on success, negative error code otherwise
+ */
+int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p)
+{
+ struct xe_gt_idle *gtidle = &gt->gtidle;
+ struct xe_device *xe = gt_to_xe(gt);
+ enum xe_gt_idle_state state;
+ u32 pg_enabled, pg_status = 0;
+ u32 vcs_mask, vecs_mask;
+ unsigned int fw_ref;
+ int n;
+ /*
+ * Media Slices
+ *
+ * Slice 0: VCS0, VCS1, VECS0
+ * Slice 1: VCS2, VCS3, VECS1
+ * Slice 2: VCS4, VCS5, VECS2
+ * Slice 3: VCS6, VCS7, VECS3
+ */
+ static const struct {
+ u64 engines;
+ u32 status_bit;
+ } media_slices[] = {
+ {(BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS1) |
+ BIT(XE_HW_ENGINE_VECS0)), MEDIA_SLICE0_AWAKE_STATUS},
+
+ {(BIT(XE_HW_ENGINE_VCS2) | BIT(XE_HW_ENGINE_VCS3) |
+ BIT(XE_HW_ENGINE_VECS1)), MEDIA_SLICE1_AWAKE_STATUS},
- xe_mmio_write32(gt, POWERGATE_ENABLE, 0);
+ {(BIT(XE_HW_ENGINE_VCS4) | BIT(XE_HW_ENGINE_VCS5) |
+ BIT(XE_HW_ENGINE_VECS2)), MEDIA_SLICE2_AWAKE_STATUS},
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
+ {(BIT(XE_HW_ENGINE_VCS6) | BIT(XE_HW_ENGINE_VCS7) |
+ BIT(XE_HW_ENGINE_VECS3)), MEDIA_SLICE3_AWAKE_STATUS},
+ };
+
+ if (xe->info.platform == XE_PVC) {
+ drm_printf(p, "Power Gating not supported\n");
+ return 0;
+ }
+
+ state = gtidle->idle_status(gtidle_to_pc(gtidle));
+ pg_enabled = gtidle->powergate_enable;
+
+ /* Do not wake the GT to read powergating status */
+ if (state != GT_IDLE_C6) {
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return -ETIMEDOUT;
+
+ pg_enabled = xe_mmio_read32(&gt->mmio, POWERGATE_ENABLE);
+ pg_status = xe_mmio_read32(&gt->mmio, POWERGATE_DOMAIN_STATUS);
+
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ }
+
+ if (gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK) {
+ drm_printf(p, "Render Power Gating Enabled: %s\n",
+ str_yes_no(pg_enabled & RENDER_POWERGATE_ENABLE));
+
+ drm_printf(p, "Render Power Gate Status: %s\n",
+ str_up_down(pg_status & RENDER_AWAKE_STATUS));
+ }
+
+ vcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_DECODE);
+ vecs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE);
+
+ /* Print media CPG status only if media is present */
+ if (vcs_mask || vecs_mask) {
+ drm_printf(p, "Media Power Gating Enabled: %s\n",
+ str_yes_no(pg_enabled & MEDIA_POWERGATE_ENABLE));
+
+ for (n = 0; n < ARRAY_SIZE(media_slices); n++)
+ if (gt->info.engine_mask & media_slices[n].engines)
+ drm_printf(p, "Media Slice%d Power Gate Status: %s\n", n,
+ str_up_down(pg_status & media_slices[n].status_bit));
+ }
+ return 0;
}
static ssize_t name_show(struct device *dev,
@@ -201,13 +301,14 @@ static void gt_idle_fini(void *arg)
{
struct kobject *kobj = arg;
struct xe_gt *gt = kobj_to_gt(kobj->parent);
+ unsigned int fw_ref;
xe_gt_idle_disable_pg(gt);
if (gt_to_xe(gt)->info.skip_guc_pc) {
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
xe_gt_idle_disable_c6(gt);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
sysfs_remove_files(kobj, gt_idle_attrs);
@@ -260,9 +361,9 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt)
return;
/* Units of 1280 ns for a total of 5s */
- xe_mmio_write32(gt, RC_IDLE_HYSTERSIS, 0x3B9ACA);
+ xe_mmio_write32(&gt->mmio, RC_IDLE_HYSTERSIS, 0x3B9ACA);
/* Enable RC6 */
- xe_mmio_write32(gt, RC_CONTROL,
+ xe_mmio_write32(&gt->mmio, RC_CONTROL,
RC_CTL_HW_ENABLE | RC_CTL_TO_MODE | RC_CTL_RC6_ENABLE);
}
@@ -274,6 +375,6 @@ void xe_gt_idle_disable_c6(struct xe_gt *gt)
if (IS_SRIOV_VF(gt_to_xe(gt)))
return;
- xe_mmio_write32(gt, RC_CONTROL, 0);
- xe_mmio_write32(gt, RC_STATE, 0);
+ xe_mmio_write32(&gt->mmio, RC_CONTROL, 0);
+ xe_mmio_write32(&gt->mmio, RC_STATE, 0);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h
index 554447b5d46d..4455a6501cb0 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.h
+++ b/drivers/gpu/drm/xe/xe_gt_idle.h
@@ -8,6 +8,7 @@
#include "xe_gt_idle_types.h"
+struct drm_printer;
struct xe_gt;
int xe_gt_idle_init(struct xe_gt_idle *gtidle);
@@ -15,5 +16,6 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt);
void xe_gt_idle_disable_c6(struct xe_gt *gt);
void xe_gt_idle_enable_pg(struct xe_gt *gt);
void xe_gt_idle_disable_pg(struct xe_gt *gt);
+int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p);
#endif /* _XE_GT_IDLE_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_idle_types.h b/drivers/gpu/drm/xe/xe_gt_idle_types.h
index f99b447534f3..b8b297a3f884 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_idle_types.h
@@ -23,6 +23,8 @@ enum xe_gt_idle_state {
struct xe_gt_idle {
/** @name: name */
char name[16];
+ /** @powergate_enable: copy of powergate enable bits */
+ u32 powergate_enable;
/** @residency_multiplier: residency multiplier in ns */
u32 residency_multiplier;
/** @cur_residency: raw driver copy of idle residency */
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index c834f64b0178..5013d674e17d 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -237,13 +237,26 @@ static const struct xe_mmio_range xe2lpm_instance0_steering_table[] = {
{},
};
+static const struct xe_mmio_range xe3lpm_instance0_steering_table[] = {
+ { 0x384000, 0x3847DF }, /* GAM, rsvd, GAM */
+ { 0x384900, 0x384AFF }, /* GAM */
+ { 0x389560, 0x3895FF }, /* MEDIAINF */
+ { 0x38B600, 0x38B8FF }, /* L3BANK */
+ { 0x38C800, 0x38D07F }, /* GAM, MEDIAINF */
+ { 0x38D0D0, 0x38F0FF }, /* MEDIAINF, GAM */
+ { 0x393C00, 0x393C7F }, /* MEDIAINF */
+ {},
+};
+
static void init_steering_l3bank(struct xe_gt *gt)
{
+ struct xe_mmio *mmio = &gt->mmio;
+
if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK,
- xe_mmio_read32(gt, MIRROR_FUSE3));
+ xe_mmio_read32(mmio, MIRROR_FUSE3));
u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK,
- xe_mmio_read32(gt, XEHP_FUSE4));
+ xe_mmio_read32(mmio, XEHP_FUSE4));
/*
* Group selects mslice, instance selects bank within mslice.
@@ -254,7 +267,7 @@ static void init_steering_l3bank(struct xe_gt *gt)
bank_mask & BIT(0) ? 0 : 2;
} else if (gt_to_xe(gt)->info.platform == XE_DG2) {
u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK,
- xe_mmio_read32(gt, MIRROR_FUSE3));
+ xe_mmio_read32(mmio, MIRROR_FUSE3));
u32 bank = __ffs(mslice_mask) * 8;
/*
@@ -266,7 +279,7 @@ static void init_steering_l3bank(struct xe_gt *gt)
gt->steering[L3BANK].instance_target = bank & 0x3;
} else {
u32 fuse = REG_FIELD_GET(L3BANK_MASK,
- ~xe_mmio_read32(gt, MIRROR_FUSE3));
+ ~xe_mmio_read32(mmio, MIRROR_FUSE3));
gt->steering[L3BANK].group_target = 0; /* unused */
gt->steering[L3BANK].instance_target = __ffs(fuse);
@@ -276,7 +289,7 @@ static void init_steering_l3bank(struct xe_gt *gt)
static void init_steering_mslice(struct xe_gt *gt)
{
u32 mask = REG_FIELD_GET(MEML3_EN_MASK,
- xe_mmio_read32(gt, MIRROR_FUSE3));
+ xe_mmio_read32(&gt->mmio, MIRROR_FUSE3));
/*
* mslice registers are valid (not terminated) if either the meml3
@@ -352,6 +365,19 @@ void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group,
*instance = dss % gt->steering_dss_per_grp;
}
+/**
+ * xe_gt_mcr_steering_info_to_dss_id - Get DSS ID from group/instance steering
+ * @gt: GT structure
+ * @group: steering group ID
+ * @instance: steering instance ID
+ *
+ * Return: the coverted DSS id.
+ */
+u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance)
+{
+ return group * dss_per_group(gt) + instance;
+}
+
static void init_steering_dss(struct xe_gt *gt)
{
gt->steering_dss_per_grp = dss_per_group(gt);
@@ -380,7 +406,7 @@ static void init_steering_oaddrm(struct xe_gt *gt)
static void init_steering_sqidi_psmi(struct xe_gt *gt)
{
u32 mask = REG_FIELD_GET(XE2_NODE_ENABLE_MASK,
- xe_mmio_read32(gt, MIRROR_FUSE3));
+ xe_mmio_read32(&gt->mmio, MIRROR_FUSE3));
u32 select = __ffs(mask);
gt->steering[SQIDI_PSMI].group_target = select >> 1;
@@ -439,7 +465,10 @@ void xe_gt_mcr_init(struct xe_gt *gt)
if (gt->info.type == XE_GT_TYPE_MEDIA) {
drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13);
- if (MEDIA_VERx100(xe) >= 1301) {
+ if (MEDIA_VER(xe) >= 30) {
+ gt->steering[OADDRM].ranges = xe2lpm_gpmxmt_steering_table;
+ gt->steering[INSTANCE0].ranges = xe3lpm_instance0_steering_table;
+ } else if (MEDIA_VERx100(xe) >= 1301) {
gt->steering[OADDRM].ranges = xe2lpm_gpmxmt_steering_table;
gt->steering[INSTANCE0].ranges = xe2lpm_instance0_steering_table;
} else {
@@ -494,8 +523,8 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt)
u32 steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, 0) |
REG_FIELD_PREP(MCR_SUBSLICE_MASK, 2);
- xe_mmio_write32(gt, MCFG_MCR_SELECTOR, steer_val);
- xe_mmio_write32(gt, SF_MCR_SELECTOR, steer_val);
+ xe_mmio_write32(&gt->mmio, MCFG_MCR_SELECTOR, steer_val);
+ xe_mmio_write32(&gt->mmio, SF_MCR_SELECTOR, steer_val);
/*
* For GAM registers, all reads should be directed to instance 1
* (unicast reads against other instances are not allowed),
@@ -533,7 +562,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
continue;
for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) {
- if (xe_mmio_in_range(gt, &gt->steering[type].ranges[i], reg)) {
+ if (xe_mmio_in_range(&gt->mmio, &gt->steering[type].ranges[i], reg)) {
*group = gt->steering[type].group_target;
*instance = gt->steering[type].instance_target;
return true;
@@ -544,7 +573,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
implicit_ranges = gt->steering[IMPLICIT_STEERING].ranges;
if (implicit_ranges)
for (int i = 0; implicit_ranges[i].end > 0; i++)
- if (xe_mmio_in_range(gt, &implicit_ranges[i], reg))
+ if (xe_mmio_in_range(&gt->mmio, &implicit_ranges[i], reg))
return false;
/*
@@ -579,7 +608,7 @@ static void mcr_lock(struct xe_gt *gt) __acquires(&gt->mcr_lock)
* when a read to the relevant register returns 1.
*/
if (GRAPHICS_VERx100(xe) >= 1270)
- ret = xe_mmio_wait32(gt, STEER_SEMAPHORE, 0x1, 0x1, 10, NULL,
+ ret = xe_mmio_wait32(&gt->mmio, STEER_SEMAPHORE, 0x1, 0x1, 10, NULL,
true);
drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT);
@@ -589,7 +618,7 @@ static void mcr_unlock(struct xe_gt *gt) __releases(&gt->mcr_lock)
{
/* Release hardware semaphore - this is done by writing 1 to the register */
if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270)
- xe_mmio_write32(gt, STEER_SEMAPHORE, 0x1);
+ xe_mmio_write32(&gt->mmio, STEER_SEMAPHORE, 0x1);
spin_unlock(&gt->mcr_lock);
}
@@ -603,6 +632,7 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
u8 rw_flag, int group, int instance, u32 value)
{
const struct xe_reg reg = to_xe_reg(reg_mcr);
+ struct xe_mmio *mmio = &gt->mmio;
struct xe_reg steer_reg;
u32 steer_val, val = 0;
@@ -635,12 +665,12 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
if (rw_flag == MCR_OP_READ)
steer_val |= MCR_MULTICAST;
- xe_mmio_write32(gt, steer_reg, steer_val);
+ xe_mmio_write32(mmio, steer_reg, steer_val);
if (rw_flag == MCR_OP_READ)
- val = xe_mmio_read32(gt, reg);
+ val = xe_mmio_read32(mmio, reg);
else
- xe_mmio_write32(gt, reg, value);
+ xe_mmio_write32(mmio, reg, value);
/*
* If we turned off the multicast bit (during a write) we're required
@@ -649,7 +679,7 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
* operation.
*/
if (rw_flag == MCR_OP_WRITE)
- xe_mmio_write32(gt, steer_reg, MCR_MULTICAST);
+ xe_mmio_write32(mmio, steer_reg, MCR_MULTICAST);
return val;
}
@@ -684,7 +714,7 @@ u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, struct xe_reg_mcr reg_mcr)
group, instance, 0);
mcr_unlock(gt);
} else {
- val = xe_mmio_read32(gt, reg);
+ val = xe_mmio_read32(&gt->mmio, reg);
}
return val;
@@ -757,7 +787,7 @@ void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
* to touch the steering register.
*/
mcr_lock(gt);
- xe_mmio_write32(gt, reg, value);
+ xe_mmio_write32(&gt->mmio, reg, value);
mcr_unlock(gt);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
index 8d119a0d5493..c0cd36021c24 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.h
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -28,6 +28,7 @@ void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p);
void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance);
+u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance);
/*
* Loop over each DSS and determine the group and instance IDs that
diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h
index d6228baaff1e..5dc71394372d 100644
--- a/drivers/gpu/drm/xe/xe_gt_printk.h
+++ b/drivers/gpu/drm/xe/xe_gt_printk.h
@@ -8,7 +8,7 @@
#include <drm/drm_print.h>
-#include "xe_device_types.h"
+#include "xe_gt_types.h"
#define xe_gt_printk(_gt, _level, _fmt, ...) \
drm_##_level(&gt_to_xe(_gt)->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
index 905f409db74b..e71fc3d2bda2 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
@@ -5,12 +5,15 @@
#include <drm/drm_managed.h>
+#include "regs/xe_guc_regs.h"
#include "regs/xe_regs.h"
+#include "xe_gt.h"
#include "xe_gt_sriov_pf.h"
#include "xe_gt_sriov_pf_config.h"
#include "xe_gt_sriov_pf_control.h"
#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_migration.h"
#include "xe_gt_sriov_pf_service.h"
#include "xe_mmio.h"
@@ -72,7 +75,7 @@ static bool pf_needs_enable_ggtt_guest_update(struct xe_device *xe)
static void pf_enable_ggtt_guest_update(struct xe_gt *gt)
{
- xe_mmio_write32(gt, VIRTUAL_CTRL_REG, GUEST_GTT_UPDATE_EN);
+ xe_mmio_write32(&gt->mmio, VIRTUAL_CTRL_REG, GUEST_GTT_UPDATE_EN);
}
/**
@@ -87,6 +90,57 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
pf_enable_ggtt_guest_update(gt);
xe_gt_sriov_pf_service_update(gt);
+ xe_gt_sriov_pf_migration_init(gt);
+}
+
+static u32 pf_get_vf_regs_stride(struct xe_device *xe)
+{
+ return GRAPHICS_VERx100(xe) > 1200 ? 0x400 : 0x1000;
+}
+
+static struct xe_reg xe_reg_vf_to_pf(struct xe_reg vf_reg, unsigned int vfid, u32 stride)
+{
+ struct xe_reg pf_reg = vf_reg;
+
+ pf_reg.vf = 0;
+ pf_reg.addr += stride * vfid;
+
+ return pf_reg;
+}
+
+static void pf_clear_vf_scratch_regs(struct xe_gt *gt, unsigned int vfid)
+{
+ u32 stride = pf_get_vf_regs_stride(gt_to_xe(gt));
+ struct xe_reg scratch;
+ int n, count;
+
+ if (xe_gt_is_media_type(gt)) {
+ count = MED_VF_SW_FLAG_COUNT;
+ for (n = 0; n < count; n++) {
+ scratch = xe_reg_vf_to_pf(MED_VF_SW_FLAG(n), vfid, stride);
+ xe_mmio_write32(&gt->mmio, scratch, 0);
+ }
+ } else {
+ count = VF_SW_FLAG_COUNT;
+ for (n = 0; n < count; n++) {
+ scratch = xe_reg_vf_to_pf(VF_SW_FLAG(n), vfid, stride);
+ xe_mmio_write32(&gt->mmio, scratch, 0);
+ }
+ }
+}
+
+/**
+ * xe_gt_sriov_pf_sanitize_hw() - Reset hardware state related to a VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function can only be called on PF.
+ */
+void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ pf_clear_vf_scratch_regs(gt, vfid);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
index f0cb726a6919..96fab779a906 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
@@ -11,6 +11,7 @@ struct xe_gt;
#ifdef CONFIG_PCI_IOV
int xe_gt_sriov_pf_init_early(struct xe_gt *gt);
void xe_gt_sriov_pf_init_hw(struct xe_gt *gt);
+void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid);
void xe_gt_sriov_pf_restart(struct xe_gt *gt);
#else
static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index afdb477ecf83..192643d63d22 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -34,6 +34,8 @@
#include "xe_ttm_vram_mgr.h"
#include "xe_wopcm.h"
+#define make_u64_from_u32(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo)))
+
/*
* Return: number of KLVs that were successfully parsed and saved,
* negative error code on failure.
@@ -229,14 +231,16 @@ static struct xe_gt_sriov_config *pf_pick_vf_config(struct xe_gt *gt, unsigned i
}
/* Return: number of configuration dwords written */
-static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config)
+static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, bool details)
{
u32 n = 0;
if (xe_ggtt_node_allocated(config->ggtt_region)) {
- cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START);
- cfg[n++] = lower_32_bits(config->ggtt_region->base.start);
- cfg[n++] = upper_32_bits(config->ggtt_region->base.start);
+ if (details) {
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START);
+ cfg[n++] = lower_32_bits(config->ggtt_region->base.start);
+ cfg[n++] = upper_32_bits(config->ggtt_region->base.start);
+ }
cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE);
cfg[n++] = lower_32_bits(config->ggtt_region->base.size);
@@ -247,20 +251,24 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config)
}
/* Return: number of configuration dwords written */
-static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config)
+static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool details)
{
u32 n = 0;
- n += encode_config_ggtt(cfg, config);
+ n += encode_config_ggtt(cfg, config, details);
- cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_CONTEXT_ID);
- cfg[n++] = config->begin_ctx;
+ if (details) {
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_CONTEXT_ID);
+ cfg[n++] = config->begin_ctx;
+ }
cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_NUM_CONTEXTS);
cfg[n++] = config->num_ctxs;
- cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_DOORBELL_ID);
- cfg[n++] = config->begin_db;
+ if (details) {
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_DOORBELL_ID);
+ cfg[n++] = config->begin_db;
+ }
cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_NUM_DOORBELLS);
cfg[n++] = config->num_dbs;
@@ -301,7 +309,7 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid)
if (!cfg)
return -ENOMEM;
- num_dwords = encode_config(cfg, config);
+ num_dwords = encode_config(cfg, config, true);
xe_gt_assert(gt, num_dwords <= max_cfg_dwords);
if (xe_gt_is_media_type(gt)) {
@@ -309,10 +317,10 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid)
struct xe_gt_sriov_config *other = pf_pick_vf_config(primary, vfid);
/* media-GT will never include a GGTT config */
- xe_gt_assert(gt, !encode_config_ggtt(cfg + num_dwords, config));
+ xe_gt_assert(gt, !encode_config_ggtt(cfg + num_dwords, config, true));
/* the GGTT config must be taken from the primary-GT instead */
- num_dwords += encode_config_ggtt(cfg + num_dwords, other);
+ num_dwords += encode_config_ggtt(cfg + num_dwords, other, true);
}
xe_gt_assert(gt, num_dwords <= max_cfg_dwords);
@@ -2044,7 +2052,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid)
valid_all = valid_all && valid_lmem;
}
- return valid_all ? 1 : valid_any ? -ENOKEY : -ENODATA;
+ return valid_all ? 0 : valid_any ? -ENOKEY : -ENODATA;
}
/**
@@ -2071,6 +2079,174 @@ bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid)
}
/**
+ * xe_gt_sriov_pf_config_save - Save a VF provisioning config as binary blob.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier (can't be PF)
+ * @buf: the buffer to save a config to (or NULL if query the buf size)
+ * @size: the size of the buffer (or 0 if query the buf size)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: mininum size of the buffer or the number of bytes saved,
+ * or a negative error code on failure.
+ */
+ssize_t xe_gt_sriov_pf_config_save(struct xe_gt *gt, unsigned int vfid, void *buf, size_t size)
+{
+ struct xe_gt_sriov_config *config;
+ ssize_t ret;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid);
+ xe_gt_assert(gt, !(!buf ^ !size));
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ ret = pf_validate_vf_config(gt, vfid);
+ if (!size) {
+ ret = ret ? 0 : SZ_4K;
+ } else if (!ret) {
+ if (size < SZ_4K) {
+ ret = -ENOBUFS;
+ } else {
+ config = pf_pick_vf_config(gt, vfid);
+ ret = encode_config(buf, config, false) * sizeof(u32);
+ }
+ }
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return ret;
+}
+
+static int pf_restore_vf_config_klv(struct xe_gt *gt, unsigned int vfid,
+ u32 key, u32 len, const u32 *value)
+{
+ switch (key) {
+ case GUC_KLV_VF_CFG_NUM_CONTEXTS_KEY:
+ if (len != GUC_KLV_VF_CFG_NUM_CONTEXTS_LEN)
+ return -EBADMSG;
+ return pf_provision_vf_ctxs(gt, vfid, value[0]);
+
+ case GUC_KLV_VF_CFG_NUM_DOORBELLS_KEY:
+ if (len != GUC_KLV_VF_CFG_NUM_DOORBELLS_LEN)
+ return -EBADMSG;
+ return pf_provision_vf_dbs(gt, vfid, value[0]);
+
+ case GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY:
+ if (len != GUC_KLV_VF_CFG_EXEC_QUANTUM_LEN)
+ return -EBADMSG;
+ return pf_provision_exec_quantum(gt, vfid, value[0]);
+
+ case GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY:
+ if (len != GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN)
+ return -EBADMSG;
+ return pf_provision_preempt_timeout(gt, vfid, value[0]);
+
+ /* auto-generate case statements */
+#define define_threshold_key_to_provision_case(TAG, ...) \
+ case MAKE_GUC_KLV_VF_CFG_THRESHOLD_KEY(TAG): \
+ BUILD_BUG_ON(MAKE_GUC_KLV_VF_CFG_THRESHOLD_LEN(TAG) != 1u); \
+ if (len != MAKE_GUC_KLV_VF_CFG_THRESHOLD_LEN(TAG)) \
+ return -EBADMSG; \
+ return pf_provision_threshold(gt, vfid, \
+ MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG), \
+ value[0]);
+
+ MAKE_XE_GUC_KLV_THRESHOLDS_SET(define_threshold_key_to_provision_case)
+#undef define_threshold_key_to_provision_case
+ }
+
+ if (xe_gt_is_media_type(gt))
+ return -EKEYREJECTED;
+
+ switch (key) {
+ case GUC_KLV_VF_CFG_GGTT_SIZE_KEY:
+ if (len != GUC_KLV_VF_CFG_GGTT_SIZE_LEN)
+ return -EBADMSG;
+ return pf_provision_vf_ggtt(gt, vfid, make_u64_from_u32(value[1], value[0]));
+
+ case GUC_KLV_VF_CFG_LMEM_SIZE_KEY:
+ if (!IS_DGFX(gt_to_xe(gt)))
+ return -EKEYREJECTED;
+ if (len != GUC_KLV_VF_CFG_LMEM_SIZE_LEN)
+ return -EBADMSG;
+ return pf_provision_vf_lmem(gt, vfid, make_u64_from_u32(value[1], value[0]));
+ }
+
+ return -EKEYREJECTED;
+}
+
+static int pf_restore_vf_config(struct xe_gt *gt, unsigned int vfid,
+ const u32 *klvs, size_t num_dwords)
+{
+ int err;
+
+ while (num_dwords >= GUC_KLV_LEN_MIN) {
+ u32 key = FIELD_GET(GUC_KLV_0_KEY, klvs[0]);
+ u32 len = FIELD_GET(GUC_KLV_0_LEN, klvs[0]);
+
+ klvs += GUC_KLV_LEN_MIN;
+ num_dwords -= GUC_KLV_LEN_MIN;
+
+ if (num_dwords < len)
+ err = -EBADMSG;
+ else
+ err = pf_restore_vf_config_klv(gt, vfid, key, len, klvs);
+
+ if (err) {
+ xe_gt_sriov_dbg(gt, "restore failed on key %#x (%pe)\n", key, ERR_PTR(err));
+ return err;
+ }
+
+ klvs += len;
+ num_dwords -= len;
+ }
+
+ return pf_validate_vf_config(gt, vfid);
+}
+
+/**
+ * xe_gt_sriov_pf_config_restore - Restore a VF provisioning config from binary blob.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier (can't be PF)
+ * @buf: the buffer with config data
+ * @size: the size of the config data
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid,
+ const void *buf, size_t size)
+{
+ int err;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid);
+
+ if (!size)
+ return -ENODATA;
+
+ if (size % sizeof(u32))
+ return -EINVAL;
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) {
+ struct drm_printer p = xe_gt_info_printer(gt);
+
+ drm_printf(&p, "restoring VF%u config:\n", vfid);
+ xe_guc_klv_print(buf, size / sizeof(u32), &p);
+ }
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ err = pf_send_vf_cfg_reset(gt, vfid);
+ if (!err) {
+ pf_release_vf_config(gt, vfid);
+ err = pf_restore_vf_config(gt, vfid, buf, size / sizeof(u32));
+ }
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return err;
+}
+
+/**
* xe_gt_sriov_pf_config_restart - Restart SR-IOV configurations after a GT reset.
* @gt: the &xe_gt
*
@@ -2203,6 +2379,41 @@ int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p)
}
/**
+ * xe_gt_sriov_pf_config_print_lmem - Print LMEM configurations.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * Print LMEM allocations across all VFs.
+ * VFs without LMEM allocation are skipped.
+ *
+ * This function can only be called on PF.
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_print_lmem(struct xe_gt *gt, struct drm_printer *p)
+{
+ unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt));
+ const struct xe_gt_sriov_config *config;
+ char buf[10];
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+
+ for (n = 1; n <= total_vfs; n++) {
+ config = &gt->sriov.pf.vfs[n].config;
+ if (!config->lmem_obj)
+ continue;
+
+ string_get_size(config->lmem_obj->size, 1, STRING_UNITS_2,
+ buf, sizeof(buf));
+ drm_printf(p, "VF%u:\t%zu\t(%s)\n",
+ n, config->lmem_obj->size, buf);
+ }
+
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+ return 0;
+}
+
+/**
* xe_gt_sriov_pf_config_print_available_ggtt - Print available GGTT ranges.
* @gt: the &xe_gt
* @p: the &drm_printer
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
index 42e64769f666..0c55aa40a1a7 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
@@ -54,6 +54,10 @@ int xe_gt_sriov_pf_config_sanitize(struct xe_gt *gt, unsigned int vfid, long tim
int xe_gt_sriov_pf_config_release(struct xe_gt *gt, unsigned int vfid, bool force);
int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh);
+ssize_t xe_gt_sriov_pf_config_save(struct xe_gt *gt, unsigned int vfid, void *buf, size_t size);
+int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid,
+ const void *buf, size_t size);
+
bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid);
void xe_gt_sriov_pf_config_restart(struct xe_gt *gt);
@@ -61,6 +65,7 @@ void xe_gt_sriov_pf_config_restart(struct xe_gt *gt);
int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p);
int xe_gt_sriov_pf_config_print_ctxs(struct xe_gt *gt, struct drm_printer *p);
int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p);
+int xe_gt_sriov_pf_config_print_lmem(struct xe_gt *gt, struct drm_printer *p);
int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
index 02f7328bd6ce..1f50aec3a059 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
@@ -9,9 +9,11 @@
#include "xe_device.h"
#include "xe_gt.h"
+#include "xe_gt_sriov_pf.h"
#include "xe_gt_sriov_pf_config.h"
#include "xe_gt_sriov_pf_control.h"
#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_migration.h"
#include "xe_gt_sriov_pf_monitor.h"
#include "xe_gt_sriov_pf_service.h"
#include "xe_gt_sriov_printk.h"
@@ -176,6 +178,7 @@ static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit)
CASE2STR(PAUSE_SEND_PAUSE);
CASE2STR(PAUSE_WAIT_GUC);
CASE2STR(PAUSE_GUC_DONE);
+ CASE2STR(PAUSE_SAVE_GUC);
CASE2STR(PAUSE_FAILED);
CASE2STR(PAUSED);
CASE2STR(RESUME_WIP);
@@ -415,6 +418,10 @@ static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid)
* : | : /
* : v : /
* : PAUSE_GUC_DONE o-----restart
+ * : | :
+ * : | o---<--busy :
+ * : v / / :
+ * : PAUSE_SAVE_GUC :
* : / :
* : / :
* :....o..............o...............o...........:
@@ -434,6 +441,7 @@ static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE);
pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE);
+ pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC);
}
}
@@ -464,12 +472,41 @@ static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid)
pf_enter_vf_pause_failed(gt, vfid);
}
+static void pf_enter_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid)
+{
+ if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC))
+ pf_enter_vf_state_machine_bug(gt, vfid);
+}
+
+static bool pf_exit_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid)
+{
+ int err;
+
+ if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC))
+ return false;
+
+ err = xe_gt_sriov_pf_migration_save_guc_state(gt, vfid);
+ if (err) {
+ /* retry if busy */
+ if (err == -EBUSY) {
+ pf_enter_vf_pause_save_guc(gt, vfid);
+ return true;
+ }
+ /* give up on error */
+ if (err == -EIO)
+ pf_enter_vf_mismatch(gt, vfid);
+ }
+
+ pf_enter_vf_pause_completed(gt, vfid);
+ return true;
+}
+
static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
{
if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
return false;
- pf_enter_vf_pause_completed(gt, vfid);
+ pf_enter_vf_pause_save_guc(gt, vfid);
return true;
}
@@ -1008,7 +1045,7 @@ static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
return false;
- /* XXX: placeholder */
+ xe_gt_sriov_pf_sanitize_hw(gt, vfid);
pf_enter_vf_flr_send_finish(gt, vfid);
return true;
@@ -1338,6 +1375,9 @@ static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid)
if (pf_exit_vf_pause_guc_done(gt, vfid))
return true;
+ if (pf_exit_vf_pause_save_guc(gt, vfid))
+ return true;
+
if (pf_exit_vf_resume_send_resume(gt, vfid))
return true;
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
index 11830aafea45..f02f941b4ad2 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
@@ -27,6 +27,7 @@
* @XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE: indicates that the PF is about to send a PAUSE command.
* @XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC: indicates that the PF awaits for a response from the GuC.
* @XE_GT_SRIOV_STATE_PAUSE_GUC_DONE: indicates that the PF has received a response from the GuC.
+ * @XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC: indicates that the PF needs to save the VF GuC state.
* @XE_GT_SRIOV_STATE_PAUSE_FAILED: indicates that a VF pause operation has failed.
* @XE_GT_SRIOV_STATE_PAUSED: indicates that the VF is paused.
* @XE_GT_SRIOV_STATE_RESUME_WIP: indicates the a VF resume operation is in progress.
@@ -56,6 +57,7 @@ enum xe_gt_sriov_control_bits {
XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE,
XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC,
XE_GT_SRIOV_STATE_PAUSE_GUC_DONE,
+ XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC,
XE_GT_SRIOV_STATE_PAUSE_FAILED,
XE_GT_SRIOV_STATE_PAUSED,
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
index 2290ddaf9594..05df4ab3514b 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
@@ -17,6 +17,7 @@
#include "xe_gt_sriov_pf_control.h"
#include "xe_gt_sriov_pf_debugfs.h"
#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_migration.h"
#include "xe_gt_sriov_pf_monitor.h"
#include "xe_gt_sriov_pf_policy.h"
#include "xe_gt_sriov_pf_service.h"
@@ -81,6 +82,11 @@ static const struct drm_info_list pf_info[] = {
.data = xe_gt_sriov_pf_config_print_dbs,
},
{
+ "lmem_provisioned",
+ .show = xe_gt_debugfs_simple_show,
+ .data = xe_gt_sriov_pf_config_print_lmem,
+ },
+ {
"runtime_registers",
.show = xe_gt_debugfs_simple_show,
.data = xe_gt_sriov_pf_service_print_runtime,
@@ -312,6 +318,9 @@ static const struct {
{ "stop", xe_gt_sriov_pf_control_stop_vf },
{ "pause", xe_gt_sriov_pf_control_pause_vf },
{ "resume", xe_gt_sriov_pf_control_resume_vf },
+#ifdef CONFIG_DRM_XE_DEBUG_SRIOV
+ { "restore!", xe_gt_sriov_pf_migration_restore_guc_state },
+#endif
};
static ssize_t control_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
@@ -375,6 +384,119 @@ static const struct file_operations control_ops = {
.llseek = default_llseek,
};
+/*
+ * /sys/kernel/debug/dri/0/
+ * ├── gt0
+ * │   ├── vf1
+ * │   │   ├── guc_state
+ */
+static ssize_t guc_state_read(struct file *file, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct dentry *dent = file_dentry(file);
+ struct dentry *parent = dent->d_parent;
+ struct xe_gt *gt = extract_gt(parent);
+ unsigned int vfid = extract_vfid(parent);
+
+ return xe_gt_sriov_pf_migration_read_guc_state(gt, vfid, buf, count, pos);
+}
+
+static ssize_t guc_state_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct dentry *dent = file_dentry(file);
+ struct dentry *parent = dent->d_parent;
+ struct xe_gt *gt = extract_gt(parent);
+ unsigned int vfid = extract_vfid(parent);
+
+ if (*pos)
+ return -EINVAL;
+
+ return xe_gt_sriov_pf_migration_write_guc_state(gt, vfid, buf, count);
+}
+
+static const struct file_operations guc_state_ops = {
+ .owner = THIS_MODULE,
+ .read = guc_state_read,
+ .write = guc_state_write,
+ .llseek = default_llseek,
+};
+
+/*
+ * /sys/kernel/debug/dri/0/
+ * ├── gt0
+ * │   ├── vf1
+ * │   │   ├── config_blob
+ */
+static ssize_t config_blob_read(struct file *file, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct dentry *dent = file_dentry(file);
+ struct dentry *parent = dent->d_parent;
+ struct xe_gt *gt = extract_gt(parent);
+ unsigned int vfid = extract_vfid(parent);
+ ssize_t ret;
+ void *tmp;
+
+ ret = xe_gt_sriov_pf_config_save(gt, vfid, NULL, 0);
+ if (!ret)
+ return -ENODATA;
+ if (ret < 0)
+ return ret;
+
+ tmp = kzalloc(ret, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ ret = xe_gt_sriov_pf_config_save(gt, vfid, tmp, ret);
+ if (ret > 0)
+ ret = simple_read_from_buffer(buf, count, pos, tmp, ret);
+
+ kfree(tmp);
+ return ret;
+}
+
+static ssize_t config_blob_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct dentry *dent = file_dentry(file);
+ struct dentry *parent = dent->d_parent;
+ struct xe_gt *gt = extract_gt(parent);
+ unsigned int vfid = extract_vfid(parent);
+ ssize_t ret;
+ void *tmp;
+
+ if (*pos)
+ return -EINVAL;
+
+ if (!count)
+ return -ENODATA;
+
+ if (count > SZ_4K)
+ return -EINVAL;
+
+ tmp = kzalloc(count, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ if (copy_from_user(tmp, buf, count)) {
+ ret = -EFAULT;
+ } else {
+ ret = xe_gt_sriov_pf_config_restore(gt, vfid, tmp, count);
+ if (!ret)
+ ret = count;
+ }
+ kfree(tmp);
+ return ret;
+}
+
+static const struct file_operations config_blob_ops = {
+ .owner = THIS_MODULE,
+ .read = config_blob_read,
+ .write = config_blob_write,
+ .llseek = default_llseek,
+};
+
/**
* xe_gt_sriov_pf_debugfs_register - Register SR-IOV PF specific entries in GT debugfs.
* @gt: the &xe_gt to register
@@ -423,5 +545,15 @@ void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root)
pf_add_config_attrs(gt, vfdentry, VFID(n));
debugfs_create_file("control", 0600, vfdentry, NULL, &control_ops);
+
+ /* for testing/debugging purposes only! */
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ debugfs_create_file("guc_state",
+ IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 0600 : 0400,
+ vfdentry, NULL, &guc_state_ops);
+ debugfs_create_file("config_blob",
+ IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 0600 : 0400,
+ vfdentry, NULL, &config_blob_ops);
+ }
}
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
new file mode 100644
index 000000000000..c712111aa30d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
@@ -0,0 +1,419 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_sriov_abi.h"
+#include "xe_bo.h"
+#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_migration.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_sriov.h"
+
+/* Return: number of dwords saved/restored/required or a negative error code on failure */
+static int guc_action_vf_save_restore(struct xe_guc *guc, u32 vfid, u32 opcode,
+ u64 addr, u32 ndwords)
+{
+ u32 request[PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_LEN] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_SAVE_RESTORE_VF) |
+ FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_0_OPCODE, opcode),
+ FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_1_VFID, vfid),
+ FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_2_ADDR_LO, lower_32_bits(addr)),
+ FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_3_ADDR_HI, upper_32_bits(addr)),
+ FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_4_SIZE, ndwords),
+ };
+
+ return xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request));
+}
+
+/* Return: size of the state in dwords or a negative error code on failure */
+static int pf_send_guc_query_vf_state_size(struct xe_gt *gt, unsigned int vfid)
+{
+ int ret;
+
+ ret = guc_action_vf_save_restore(&gt->uc.guc, vfid, GUC_PF_OPCODE_VF_SAVE, 0, 0);
+ return ret ?: -ENODATA;
+}
+
+/* Return: number of state dwords saved or a negative error code on failure */
+static int pf_send_guc_save_vf_state(struct xe_gt *gt, unsigned int vfid,
+ void *buff, size_t size)
+{
+ const int ndwords = size / sizeof(u32);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_guc *guc = &gt->uc.guc;
+ struct xe_bo *bo;
+ int ret;
+
+ xe_gt_assert(gt, size % sizeof(u32) == 0);
+ xe_gt_assert(gt, size == ndwords * sizeof(u32));
+
+ bo = xe_bo_create_pin_map(xe, tile, NULL,
+ ALIGN(size, PAGE_SIZE),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_SAVE,
+ xe_bo_ggtt_addr(bo), ndwords);
+ if (!ret)
+ ret = -ENODATA;
+ else if (ret > ndwords)
+ ret = -EPROTO;
+ else if (ret > 0)
+ xe_map_memcpy_from(xe, buff, &bo->vmap, 0, ret * sizeof(u32));
+
+ xe_bo_unpin_map_no_vm(bo);
+ return ret;
+}
+
+/* Return: number of state dwords restored or a negative error code on failure */
+static int pf_send_guc_restore_vf_state(struct xe_gt *gt, unsigned int vfid,
+ const void *buff, size_t size)
+{
+ const int ndwords = size / sizeof(u32);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_guc *guc = &gt->uc.guc;
+ struct xe_bo *bo;
+ int ret;
+
+ xe_gt_assert(gt, size % sizeof(u32) == 0);
+ xe_gt_assert(gt, size == ndwords * sizeof(u32));
+
+ bo = xe_bo_create_pin_map(xe, tile, NULL,
+ ALIGN(size, PAGE_SIZE),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ xe_map_memcpy_to(xe, &bo->vmap, 0, buff, size);
+
+ ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_RESTORE,
+ xe_bo_ggtt_addr(bo), ndwords);
+ if (!ret)
+ ret = -ENODATA;
+ else if (ret > ndwords)
+ ret = -EPROTO;
+
+ xe_bo_unpin_map_no_vm(bo);
+ return ret;
+}
+
+static bool pf_migration_supported(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ return gt->sriov.pf.migration.supported;
+}
+
+static struct mutex *pf_migration_mutex(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ return &gt->sriov.pf.migration.snapshot_lock;
+}
+
+static struct xe_gt_sriov_state_snapshot *pf_pick_vf_snapshot(struct xe_gt *gt,
+ unsigned int vfid)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+ lockdep_assert_held(pf_migration_mutex(gt));
+
+ return &gt->sriov.pf.vfs[vfid].snapshot;
+}
+
+static unsigned int pf_snapshot_index(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot)
+{
+ return container_of(snapshot, struct xe_gt_sriov_metadata, snapshot) - gt->sriov.pf.vfs;
+}
+
+static void pf_free_guc_state(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ drmm_kfree(&xe->drm, snapshot->guc.buff);
+ snapshot->guc.buff = NULL;
+ snapshot->guc.size = 0;
+}
+
+static int pf_alloc_guc_state(struct xe_gt *gt,
+ struct xe_gt_sriov_state_snapshot *snapshot,
+ size_t size)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ void *p;
+
+ pf_free_guc_state(gt, snapshot);
+
+ if (!size)
+ return -ENODATA;
+
+ if (size % sizeof(u32))
+ return -EINVAL;
+
+ if (size > SZ_2M)
+ return -EFBIG;
+
+ p = drmm_kzalloc(&xe->drm, size, GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ snapshot->guc.buff = p;
+ snapshot->guc.size = size;
+ return 0;
+}
+
+static void pf_dump_guc_state(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot)
+{
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) {
+ unsigned int vfid __maybe_unused = pf_snapshot_index(gt, snapshot);
+
+ xe_gt_sriov_dbg_verbose(gt, "VF%u GuC state is %zu dwords:\n",
+ vfid, snapshot->guc.size / sizeof(u32));
+ print_hex_dump_bytes("state: ", DUMP_PREFIX_OFFSET,
+ snapshot->guc.buff, min(SZ_64, snapshot->guc.size));
+ }
+}
+
+static int pf_save_vf_guc_state(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_state_snapshot *snapshot = pf_pick_vf_snapshot(gt, vfid);
+ size_t size;
+ int ret;
+
+ ret = pf_send_guc_query_vf_state_size(gt, vfid);
+ if (ret < 0)
+ goto fail;
+ size = ret * sizeof(u32);
+ xe_gt_sriov_dbg_verbose(gt, "VF%u state size is %d dwords (%zu bytes)\n", vfid, ret, size);
+
+ ret = pf_alloc_guc_state(gt, snapshot, size);
+ if (ret < 0)
+ goto fail;
+
+ ret = pf_send_guc_save_vf_state(gt, vfid, snapshot->guc.buff, size);
+ if (ret < 0)
+ goto fail;
+ size = ret * sizeof(u32);
+ xe_gt_assert(gt, size);
+ xe_gt_assert(gt, size <= snapshot->guc.size);
+ snapshot->guc.size = size;
+
+ pf_dump_guc_state(gt, snapshot);
+ return 0;
+
+fail:
+ xe_gt_sriov_dbg(gt, "Unable to save VF%u state (%pe)\n", vfid, ERR_PTR(ret));
+ pf_free_guc_state(gt, snapshot);
+ return ret;
+}
+
+/**
+ * xe_gt_sriov_pf_migration_save_guc_state() - Take a GuC VF state snapshot.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid)
+{
+ int err;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid != PFID);
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+ if (!pf_migration_supported(gt))
+ return -ENOPKG;
+
+ mutex_lock(pf_migration_mutex(gt));
+ err = pf_save_vf_guc_state(gt, vfid);
+ mutex_unlock(pf_migration_mutex(gt));
+
+ return err;
+}
+
+static int pf_restore_vf_guc_state(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_state_snapshot *snapshot = pf_pick_vf_snapshot(gt, vfid);
+ int ret;
+
+ if (!snapshot->guc.size)
+ return -ENODATA;
+
+ xe_gt_sriov_dbg_verbose(gt, "restoring %zu dwords of VF%u GuC state\n",
+ snapshot->guc.size / sizeof(u32), vfid);
+ ret = pf_send_guc_restore_vf_state(gt, vfid, snapshot->guc.buff, snapshot->guc.size);
+ if (ret < 0)
+ goto fail;
+
+ xe_gt_sriov_dbg_verbose(gt, "restored %d dwords of VF%u GuC state\n", ret, vfid);
+ return 0;
+
+fail:
+ xe_gt_sriov_dbg(gt, "Failed to restore VF%u GuC state (%pe)\n", vfid, ERR_PTR(ret));
+ return ret;
+}
+
+/**
+ * xe_gt_sriov_pf_migration_restore_guc_state() - Restore a GuC VF state.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid)
+{
+ int ret;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid != PFID);
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+ if (!pf_migration_supported(gt))
+ return -ENOPKG;
+
+ mutex_lock(pf_migration_mutex(gt));
+ ret = pf_restore_vf_guc_state(gt, vfid);
+ mutex_unlock(pf_migration_mutex(gt));
+
+ return ret;
+}
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * xe_gt_sriov_pf_migration_read_guc_state() - Read a GuC VF state.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @buf: the user space buffer to read to
+ * @count: the maximum number of bytes to read
+ * @pos: the current position in the buffer
+ *
+ * This function is for PF only.
+ *
+ * This function reads up to @count bytes from the saved VF GuC state buffer
+ * at offset @pos into the user space address starting at @buf.
+ *
+ * Return: the number of bytes read or a negative error code on failure.
+ */
+ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int vfid,
+ char __user *buf, size_t count, loff_t *pos)
+{
+ struct xe_gt_sriov_state_snapshot *snapshot;
+ ssize_t ret;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid != PFID);
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+ if (!pf_migration_supported(gt))
+ return -ENOPKG;
+
+ mutex_lock(pf_migration_mutex(gt));
+ snapshot = pf_pick_vf_snapshot(gt, vfid);
+ if (snapshot->guc.size)
+ ret = simple_read_from_buffer(buf, count, pos, snapshot->guc.buff,
+ snapshot->guc.size);
+ else
+ ret = -ENODATA;
+ mutex_unlock(pf_migration_mutex(gt));
+
+ return ret;
+}
+
+/**
+ * xe_gt_sriov_pf_migration_write_guc_state() - Write a GuC VF state.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @buf: the user space buffer with GuC VF state
+ * @size: the size of GuC VF state (in bytes)
+ *
+ * This function is for PF only.
+ *
+ * This function reads @size bytes of the VF GuC state stored at user space
+ * address @buf and writes it into a internal VF state buffer.
+ *
+ * Return: the number of bytes used or a negative error code on failure.
+ */
+ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int vfid,
+ const char __user *buf, size_t size)
+{
+ struct xe_gt_sriov_state_snapshot *snapshot;
+ loff_t pos = 0;
+ ssize_t ret;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid != PFID);
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+ if (!pf_migration_supported(gt))
+ return -ENOPKG;
+
+ mutex_lock(pf_migration_mutex(gt));
+ snapshot = pf_pick_vf_snapshot(gt, vfid);
+ ret = pf_alloc_guc_state(gt, snapshot, size);
+ if (!ret) {
+ ret = simple_write_to_buffer(snapshot->guc.buff, size, &pos, buf, size);
+ if (ret < 0)
+ pf_free_guc_state(gt, snapshot);
+ else
+ pf_dump_guc_state(gt, snapshot);
+ }
+ mutex_unlock(pf_migration_mutex(gt));
+
+ return ret;
+}
+#endif /* CONFIG_DEBUG_FS */
+
+static bool pf_check_migration_support(struct xe_gt *gt)
+{
+ /* GuC 70.25 with save/restore v2 is required */
+ xe_gt_assert(gt, GUC_FIRMWARE_VER(&gt->uc.guc) >= MAKE_GUC_VER(70, 25, 0));
+
+ /* XXX: for now this is for feature enabling only */
+ return IS_ENABLED(CONFIG_DRM_XE_DEBUG);
+}
+
+/**
+ * xe_gt_sriov_pf_migration_init() - Initialize support for VF migration.
+ * @gt: the &xe_gt
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_migration_init(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ int err;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(xe));
+
+ gt->sriov.pf.migration.supported = pf_check_migration_support(gt);
+
+ if (!pf_migration_supported(gt))
+ return 0;
+
+ err = drmm_mutex_init(&xe->drm, &gt->sriov.pf.migration.snapshot_lock);
+ if (err)
+ return err;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
new file mode 100644
index 000000000000..09faeae00ddb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_MIGRATION_H_
+#define _XE_GT_SRIOV_PF_MIGRATION_H_
+
+#include <linux/types.h>
+
+struct xe_gt;
+
+int xe_gt_sriov_pf_migration_init(struct xe_gt *gt);
+int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid);
+
+#ifdef CONFIG_DEBUG_FS
+ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int vfid,
+ char __user *buf, size_t count, loff_t *pos);
+ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int vfid,
+ const char __user *buf, size_t count);
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h
new file mode 100644
index 000000000000..1f3110b6d44f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_MIGRATION_TYPES_H_
+#define _XE_GT_SRIOV_PF_MIGRATION_TYPES_H_
+
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+/**
+ * struct xe_gt_sriov_state_snapshot - GT-level per-VF state snapshot data.
+ *
+ * Used by the PF driver to maintain per-VF migration data.
+ */
+struct xe_gt_sriov_state_snapshot {
+ /** @guc: GuC VF state snapshot */
+ struct {
+ /** @guc.buff: buffer with the VF state */
+ u32 *buff;
+ /** @guc.size: size of the buffer (must be dwords aligned) */
+ u32 size;
+ } guc;
+};
+
+/**
+ * struct xe_gt_sriov_pf_migration - GT-level data.
+ *
+ * Used by the PF driver to maintain non-VF specific per-GT data.
+ */
+struct xe_gt_sriov_pf_migration {
+ /** @supported: indicates whether the feature is supported */
+ bool supported;
+
+ /** @snapshot_lock: protects all VFs snapshots */
+ struct mutex snapshot_lock;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
index 0e23b7ea4f3e..924e75b94aec 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
@@ -237,7 +237,7 @@ static void read_many(struct xe_gt *gt, unsigned int count,
const struct xe_reg *regs, u32 *values)
{
while (count--)
- *values++ = xe_mmio_read32(gt, *regs++);
+ *values++ = xe_mmio_read32(&gt->mmio, *regs++);
}
static void pf_prepare_runtime_info(struct xe_gt *gt)
@@ -402,7 +402,7 @@ static int pf_service_runtime_query(struct xe_gt *gt, u32 start, u32 limit,
for (i = 0; i < count; ++i, ++data) {
addr = runtime->regs[start + i].addr;
- data->offset = xe_mmio_adjusted_addr(gt, addr);
+ data->offset = xe_mmio_adjusted_addr(&gt->mmio, addr);
data->value = runtime->values[start + i];
}
@@ -513,7 +513,7 @@ int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p
for (; size--; regs++, values++) {
drm_printf(p, "reg[%#x] = %#x\n",
- xe_mmio_adjusted_addr(gt, regs->addr), *values);
+ xe_mmio_adjusted_addr(&gt->mmio, regs->addr), *values);
}
return 0;
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
index 28e1b130bf87..0426b1a77069 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
@@ -10,6 +10,7 @@
#include "xe_gt_sriov_pf_config_types.h"
#include "xe_gt_sriov_pf_control_types.h"
+#include "xe_gt_sriov_pf_migration_types.h"
#include "xe_gt_sriov_pf_monitor_types.h"
#include "xe_gt_sriov_pf_policy_types.h"
#include "xe_gt_sriov_pf_service_types.h"
@@ -29,6 +30,9 @@ struct xe_gt_sriov_metadata {
/** @version: negotiated VF/PF ABI version */
struct xe_gt_sriov_pf_service_version version;
+
+ /** @snapshot: snapshot of the VF state data */
+ struct xe_gt_sriov_state_snapshot snapshot;
};
/**
@@ -36,6 +40,7 @@ struct xe_gt_sriov_metadata {
* @service: service data.
* @control: control data.
* @policy: policy data.
+ * @migration: migration data.
* @spare: PF-only provisioning configuration.
* @vfs: metadata for all VFs.
*/
@@ -43,6 +48,7 @@ struct xe_gt_sriov_pf {
struct xe_gt_sriov_pf_service service;
struct xe_gt_sriov_pf_control control;
struct xe_gt_sriov_pf_policy policy;
+ struct xe_gt_sriov_pf_migration migration;
struct xe_gt_sriov_spare_config spare;
struct xe_gt_sriov_metadata *vfs;
};
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index 4ebc82e607af..d3baba50f085 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -881,7 +881,7 @@ static struct vf_runtime_reg *vf_lookup_reg(struct xe_gt *gt, u32 addr)
*/
u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg)
{
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(&gt->mmio, reg.addr);
struct vf_runtime_reg *rr;
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
@@ -917,7 +917,7 @@ u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg)
*/
void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val)
{
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(&gt->mmio, reg.addr);
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
xe_gt_assert(gt, !reg.vf);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c
index f3ddcbefc6bc..2ed5b6780d30 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c
@@ -33,7 +33,7 @@ static const struct drm_info_list vf_info[] = {
.show = xe_gt_debugfs_simple_show,
.data = xe_gt_sriov_vf_print_version,
},
-#if defined(CONFIG_DRM_XE_DEBUG) || defined(CONFIG_DRM_XE_DEBUG_SRIOV)
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) || IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)
{
"runtime_regs",
.show = xe_gt_debugfs_simple_show,
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
index 25963e33a383..03b225364101 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
@@ -41,9 +41,9 @@ u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt)
xe_pm_runtime_get(gt_to_xe(gt));
if (xe_gt_is_media_type(gt))
- reg = xe_mmio_read32(gt, MTL_MEDIA_PERF_LIMIT_REASONS);
+ reg = xe_mmio_read32(&gt->mmio, MTL_MEDIA_PERF_LIMIT_REASONS);
else
- reg = xe_mmio_read32(gt, GT0_PERF_LIMIT_REASONS);
+ reg = xe_mmio_read32(&gt->mmio, GT0_PERF_LIMIT_REASONS);
xe_pm_runtime_put(gt_to_xe(gt));
return reg;
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 9d82ea30f4df..3cb228c773cd 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -270,6 +270,7 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt,
int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
+ unsigned int fw_ref;
if (xe_guc_ct_enabled(&gt->uc.guc.ct) &&
gt->uc.guc.submission_state.enabled) {
@@ -283,20 +284,22 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
xe_gt_tlb_invalidation_fence_wait(&fence);
} else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
+ struct xe_mmio *mmio = &gt->mmio;
+
if (IS_SRIOV_VF(xe))
return 0;
- xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
- xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1,
+ xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1,
PVC_GUC_TLB_INV_DESC1_INVALIDATE);
- xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0,
+ xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0,
PVC_GUC_TLB_INV_DESC0_VALID);
} else {
- xe_mmio_write32(gt, GUC_TLB_INV_CR,
+ xe_mmio_write32(mmio, GUC_TLB_INV_CR,
GUC_TLB_INV_CR_INVALIDATE);
}
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
return 0;
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index 0662f71c6ede..df2042db7ee6 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -5,6 +5,7 @@
#include "xe_gt_topology.h"
+#include <generated/xe_wa_oob.h>
#include <linux/bitmap.h>
#include <linux/compiler.h>
@@ -12,6 +13,7 @@
#include "xe_assert.h"
#include "xe_gt.h"
#include "xe_mmio.h"
+#include "xe_wa.h"
static void
load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
@@ -25,7 +27,7 @@ load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
va_start(argp, numregs);
for (i = 0; i < numregs; i++)
- fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, struct xe_reg));
+ fuse_val[i] = xe_mmio_read32(&gt->mmio, va_arg(argp, struct xe_reg));
va_end(argp);
bitmap_from_arr32(mask, fuse_val, numregs * 32);
@@ -35,7 +37,7 @@ static void
load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type)
{
struct xe_device *xe = gt_to_xe(gt);
- u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE);
+ u32 reg_val = xe_mmio_read32(&gt->mmio, XELP_EU_ENABLE);
u32 val = 0;
int i;
@@ -127,7 +129,19 @@ static void
load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
{
struct xe_device *xe = gt_to_xe(gt);
- u32 fuse3 = xe_mmio_read32(gt, MIRROR_FUSE3);
+ u32 fuse3 = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3);
+
+ /*
+ * PTL platforms with media version 30.00 do not provide proper values
+ * for the media GT's L3 bank registers. Skip the readout since we
+ * don't have any way to obtain real values.
+ *
+ * This may get re-described as an official workaround in the future,
+ * but there's no tracking number assigned yet so we use a custom
+ * OOB workaround descriptor.
+ */
+ if (XE_WA(gt, no_media_l3))
+ return;
if (GRAPHICS_VER(xe) >= 20) {
xe_l3_bank_mask_t per_node = {};
@@ -141,7 +155,7 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
xe_l3_bank_mask_t per_node = {};
xe_l3_bank_mask_t per_mask_bit = {};
u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
- u32 fuse4 = xe_mmio_read32(gt, XEHP_FUSE4);
+ u32 fuse4 = xe_mmio_read32(&gt->mmio, XEHP_FUSE4);
u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4);
bitmap_set_value8(per_mask_bit, 0x3, 0);
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 3d1c51de0268..a287b98ee70b 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -6,6 +6,7 @@
#ifndef _XE_GT_TYPES_H_
#define _XE_GT_TYPES_H_
+#include "xe_device_types.h"
#include "xe_force_wake_types.h"
#include "xe_gt_idle_types.h"
#include "xe_gt_sriov_pf_types.h"
@@ -145,19 +146,20 @@ struct xe_gt {
/**
* @mmio: mmio info for GT. All GTs within a tile share the same
* register space, but have their own copy of GSI registers at a
- * specific offset, as well as their own forcewake handling.
+ * specific offset.
+ */
+ struct xe_mmio mmio;
+
+ /**
+ * @pm: power management info for GT. The driver uses the GT's
+ * "force wake" interface to wake up specific parts of the GT hardware
+ * from C6 sleep states and ensure the hardware remains awake while it
+ * is being actively used.
*/
struct {
- /** @mmio.fw: force wake for GT */
+ /** @pm.fw: force wake for GT */
struct xe_force_wake fw;
- /**
- * @mmio.adj_limit: adjust MMIO address if address is below this
- * value
- */
- u32 adj_limit;
- /** @mmio.adj_offset: offect to add to MMIO address when adjusting */
- u32 adj_offset;
- } mmio;
+ } pm;
/** @sriov: virtualization data related to GT */
union {
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 52df28032a6f..7f704346a8f4 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -14,6 +14,7 @@
#include "regs/xe_gt_regs.h"
#include "regs/xe_gtt_defs.h"
#include "regs/xe_guc_regs.h"
+#include "regs/xe_irq_regs.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_force_wake.h"
@@ -22,6 +23,7 @@
#include "xe_gt_sriov_vf.h"
#include "xe_gt_throttle.h"
#include "xe_guc_ads.h"
+#include "xe_guc_capture.h"
#include "xe_guc_ct.h"
#include "xe_guc_db_mgr.h"
#include "xe_guc_hwconfig.h"
@@ -68,7 +70,7 @@ static u32 guc_ctl_debug_flags(struct xe_guc *guc)
static u32 guc_ctl_feature_flags(struct xe_guc *guc)
{
- u32 flags = 0;
+ u32 flags = GUC_CTL_ENABLE_LITE_RESTORE;
if (!guc_to_xe(guc)->info.skip_guc_pc)
flags |= GUC_CTL_ENABLE_SLPC;
@@ -236,20 +238,21 @@ static void guc_write_params(struct xe_guc *guc)
xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
- xe_mmio_write32(gt, SOFT_SCRATCH(0), 0);
+ xe_mmio_write32(&gt->mmio, SOFT_SCRATCH(0), 0);
for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
- xe_mmio_write32(gt, SOFT_SCRATCH(1 + i), guc->params[i]);
+ xe_mmio_write32(&gt->mmio, SOFT_SCRATCH(1 + i), guc->params[i]);
}
static void guc_fini_hw(void *arg)
{
struct xe_guc *guc = arg;
struct xe_gt *gt = guc_to_gt(guc);
+ unsigned int fw_ref;
- xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
xe_uc_fini_hw(&guc_to_gt(guc)->uc);
- xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
/**
@@ -338,6 +341,10 @@ int xe_guc_init(struct xe_guc *guc)
if (ret)
goto out;
+ ret = xe_guc_capture_init(guc);
+ if (ret)
+ goto out;
+
ret = xe_guc_ads_init(&guc->ads);
if (ret)
goto out;
@@ -425,6 +432,7 @@ int xe_guc_post_load_init(struct xe_guc *guc)
int xe_guc_reset(struct xe_guc *guc)
{
struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_mmio *mmio = &gt->mmio;
u32 guc_status, gdrst;
int ret;
@@ -433,15 +441,15 @@ int xe_guc_reset(struct xe_guc *guc)
if (IS_SRIOV_VF(gt_to_xe(gt)))
return xe_gt_sriov_vf_bootstrap(gt);
- xe_mmio_write32(gt, GDRST, GRDOM_GUC);
+ xe_mmio_write32(mmio, GDRST, GRDOM_GUC);
- ret = xe_mmio_wait32(gt, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false);
+ ret = xe_mmio_wait32(mmio, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false);
if (ret) {
xe_gt_err(gt, "GuC reset timed out, GDRST=%#x\n", gdrst);
goto err_out;
}
- guc_status = xe_mmio_read32(gt, GUC_STATUS);
+ guc_status = xe_mmio_read32(mmio, GUC_STATUS);
if (!(guc_status & GS_MIA_IN_RESET)) {
xe_gt_err(gt, "GuC status: %#x, MIA core expected to be in reset\n",
guc_status);
@@ -459,6 +467,7 @@ err_out:
static void guc_prepare_xfer(struct xe_guc *guc)
{
struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_mmio *mmio = &gt->mmio;
struct xe_device *xe = guc_to_xe(guc);
u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC |
GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
@@ -473,12 +482,12 @@ static void guc_prepare_xfer(struct xe_guc *guc)
shim_flags |= REG_FIELD_PREP(GUC_MOCS_INDEX_MASK, gt->mocs.uc_index);
/* Must program this register before loading the ucode with DMA */
- xe_mmio_write32(gt, GUC_SHIM_CONTROL, shim_flags);
+ xe_mmio_write32(mmio, GUC_SHIM_CONTROL, shim_flags);
- xe_mmio_write32(gt, GT_PM_CONFIG, GT_DOORBELL_ENABLE);
+ xe_mmio_write32(mmio, GT_PM_CONFIG, GT_DOORBELL_ENABLE);
/* Make sure GuC receives ARAT interrupts */
- xe_mmio_rmw32(gt, PMINTRMSK, ARAT_EXPIRED_INTRMSK, 0);
+ xe_mmio_rmw32(mmio, PMINTRMSK, ARAT_EXPIRED_INTRMSK, 0);
}
/*
@@ -494,7 +503,7 @@ static int guc_xfer_rsa(struct xe_guc *guc)
if (guc->fw.rsa_size > 256) {
u32 rsa_ggtt_addr = xe_bo_ggtt_addr(guc->fw.bo) +
xe_uc_fw_rsa_offset(&guc->fw);
- xe_mmio_write32(gt, UOS_RSA_SCRATCH(0), rsa_ggtt_addr);
+ xe_mmio_write32(&gt->mmio, UOS_RSA_SCRATCH(0), rsa_ggtt_addr);
return 0;
}
@@ -503,7 +512,7 @@ static int guc_xfer_rsa(struct xe_guc *guc)
return -ENOMEM;
for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++)
- xe_mmio_write32(gt, UOS_RSA_SCRATCH(i), rsa[i]);
+ xe_mmio_write32(&gt->mmio, UOS_RSA_SCRATCH(i), rsa[i]);
return 0;
}
@@ -583,7 +592,7 @@ static s32 guc_pc_get_cur_freq(struct xe_guc_pc *guc_pc)
* extreme thermal throttling. And a system that is that hot during boot is probably
* dead anyway!
*/
-#if defined(CONFIG_DRM_XE_DEBUG)
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
#define GUC_LOAD_RETRY_LIMIT 20
#else
#define GUC_LOAD_RETRY_LIMIT 3
@@ -593,6 +602,7 @@ static s32 guc_pc_get_cur_freq(struct xe_guc_pc *guc_pc)
static void guc_wait_ucode(struct xe_guc *guc)
{
struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_mmio *mmio = &gt->mmio;
struct xe_guc_pc *guc_pc = &gt->uc.guc.pc;
ktime_t before, after, delta;
int load_done;
@@ -619,7 +629,7 @@ static void guc_wait_ucode(struct xe_guc *guc)
* timeouts rather than allowing a huge timeout each time. So basically, need
* to treat a timeout no different to a value change.
*/
- ret = xe_mmio_wait32_not(gt, GUC_STATUS, GS_UKERNEL_MASK | GS_BOOTROM_MASK,
+ ret = xe_mmio_wait32_not(mmio, GUC_STATUS, GS_UKERNEL_MASK | GS_BOOTROM_MASK,
last_status, 1000 * 1000, &status, false);
if (ret < 0)
count++;
@@ -657,7 +667,7 @@ static void guc_wait_ucode(struct xe_guc *guc)
switch (bootrom) {
case XE_BOOTROM_STATUS_NO_KEY_FOUND:
xe_gt_err(gt, "invalid key requested, header = 0x%08X\n",
- xe_mmio_read32(gt, GUC_HEADER_INFO));
+ xe_mmio_read32(mmio, GUC_HEADER_INFO));
break;
case XE_BOOTROM_STATUS_RSA_FAILED:
@@ -672,7 +682,7 @@ static void guc_wait_ucode(struct xe_guc *guc)
switch (ukernel) {
case XE_GUC_LOAD_STATUS_EXCEPTION:
xe_gt_err(gt, "firmware exception. EIP: %#x\n",
- xe_mmio_read32(gt, SOFT_SCRATCH(13)));
+ xe_mmio_read32(mmio, SOFT_SCRATCH(13)));
break;
case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
@@ -824,10 +834,10 @@ static void guc_handle_mmio_msg(struct xe_guc *guc)
xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
- msg = xe_mmio_read32(gt, SOFT_SCRATCH(15));
+ msg = xe_mmio_read32(&gt->mmio, SOFT_SCRATCH(15));
msg &= XE_GUC_RECV_MSG_EXCEPTION |
XE_GUC_RECV_MSG_CRASH_DUMP_POSTED;
- xe_mmio_write32(gt, SOFT_SCRATCH(15), 0);
+ xe_mmio_write32(&gt->mmio, SOFT_SCRATCH(15), 0);
if (msg & XE_GUC_RECV_MSG_CRASH_DUMP_POSTED)
xe_gt_err(gt, "Received early GuC crash dump notification!\n");
@@ -844,14 +854,14 @@ static void guc_enable_irq(struct xe_guc *guc)
REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST);
/* Primary GuC and media GuC share a single enable bit */
- xe_mmio_write32(gt, GUC_SG_INTR_ENABLE,
+ xe_mmio_write32(&gt->mmio, GUC_SG_INTR_ENABLE,
REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST));
/*
* There are separate mask bits for primary and media GuCs, so use
* a RMW operation to avoid clobbering the other GuC's setting.
*/
- xe_mmio_rmw32(gt, GUC_SG_INTR_MASK, events, 0);
+ xe_mmio_rmw32(&gt->mmio, GUC_SG_INTR_MASK, events, 0);
}
int xe_guc_enable_communication(struct xe_guc *guc)
@@ -863,7 +873,7 @@ int xe_guc_enable_communication(struct xe_guc *guc)
struct xe_gt *gt = guc_to_gt(guc);
struct xe_tile *tile = gt_to_tile(gt);
- err = xe_memirq_init_guc(&tile->sriov.vf.memirq, guc);
+ err = xe_memirq_init_guc(&tile->memirq, guc);
if (err)
return err;
} else {
@@ -907,7 +917,7 @@ void xe_guc_notify(struct xe_guc *guc)
* additional payload data to the GuC but this capability is not
* used by the firmware yet. Use default value in the meantime.
*/
- xe_mmio_write32(gt, guc->notify_reg, default_notify_data);
+ xe_mmio_write32(&gt->mmio, guc->notify_reg, default_notify_data);
}
int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr)
@@ -925,6 +935,7 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request,
{
struct xe_device *xe = guc_to_xe(guc);
struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_mmio *mmio = &gt->mmio;
u32 header, reply;
struct xe_reg reply_reg = xe_gt_is_media_type(gt) ?
MED_VF_SW_FLAG(0) : VF_SW_FLAG(0);
@@ -947,19 +958,19 @@ retry:
/* Not in critical data-path, just do if else for GT type */
if (xe_gt_is_media_type(gt)) {
for (i = 0; i < len; ++i)
- xe_mmio_write32(gt, MED_VF_SW_FLAG(i),
+ xe_mmio_write32(mmio, MED_VF_SW_FLAG(i),
request[i]);
- xe_mmio_read32(gt, MED_VF_SW_FLAG(LAST_INDEX));
+ xe_mmio_read32(mmio, MED_VF_SW_FLAG(LAST_INDEX));
} else {
for (i = 0; i < len; ++i)
- xe_mmio_write32(gt, VF_SW_FLAG(i),
+ xe_mmio_write32(mmio, VF_SW_FLAG(i),
request[i]);
- xe_mmio_read32(gt, VF_SW_FLAG(LAST_INDEX));
+ xe_mmio_read32(mmio, VF_SW_FLAG(LAST_INDEX));
}
xe_guc_notify(guc);
- ret = xe_mmio_wait32(gt, reply_reg, GUC_HXG_MSG_0_ORIGIN,
+ ret = xe_mmio_wait32(mmio, reply_reg, GUC_HXG_MSG_0_ORIGIN,
FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC),
50000, &reply, false);
if (ret) {
@@ -969,7 +980,7 @@ timeout:
return ret;
}
- header = xe_mmio_read32(gt, reply_reg);
+ header = xe_mmio_read32(mmio, reply_reg);
if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
GUC_HXG_TYPE_NO_RESPONSE_BUSY) {
/*
@@ -985,7 +996,7 @@ timeout:
BUILD_BUG_ON(FIELD_MAX(GUC_HXG_MSG_0_TYPE) != GUC_HXG_TYPE_RESPONSE_SUCCESS);
BUILD_BUG_ON((GUC_HXG_TYPE_RESPONSE_SUCCESS ^ GUC_HXG_TYPE_RESPONSE_FAILURE) != 1);
- ret = xe_mmio_wait32(gt, reply_reg, resp_mask, resp_mask,
+ ret = xe_mmio_wait32(mmio, reply_reg, resp_mask, resp_mask,
1000000, &header, false);
if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) !=
@@ -1032,7 +1043,7 @@ proto:
for (i = 1; i < VF_SW_FLAG_COUNT; i++) {
reply_reg.addr += sizeof(u32);
- response_buf[i] = xe_mmio_read32(gt, reply_reg);
+ response_buf[i] = xe_mmio_read32(mmio, reply_reg);
}
}
@@ -1145,17 +1156,17 @@ int xe_guc_start(struct xe_guc *guc)
void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
{
struct xe_gt *gt = guc_to_gt(guc);
+ unsigned int fw_ref;
u32 status;
- int err;
int i;
xe_uc_fw_print(&guc->fw, p);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
return;
- status = xe_mmio_read32(gt, GUC_STATUS);
+ status = xe_mmio_read32(&gt->mmio, GUC_STATUS);
drm_printf(p, "\nGuC status 0x%08x:\n", status);
drm_printf(p, "\tBootrom status = 0x%x\n",
@@ -1170,12 +1181,15 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
drm_puts(p, "\nScratch registers:\n");
for (i = 0; i < SOFT_SCRATCH_COUNT; i++) {
drm_printf(p, "\t%2d: \t0x%x\n",
- i, xe_mmio_read32(gt, SOFT_SCRATCH(i)));
+ i, xe_mmio_read32(&gt->mmio, SOFT_SCRATCH(i)));
}
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ drm_puts(p, "\n");
xe_guc_ct_print(&guc->ct, p, false);
+
+ drm_puts(p, "\n");
xe_guc_submit_print(guc, p);
}
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index 42116b167c98..58338be44558 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -82,4 +82,9 @@ static inline struct xe_device *guc_to_xe(struct xe_guc *guc)
return gt_to_xe(guc_to_gt(guc));
}
+static inline struct drm_device *guc_to_drm(struct xe_guc *guc)
+{
+ return &guc_to_xe(guc)->drm;
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index d1902a8581ca..4e746ae98888 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -5,6 +5,8 @@
#include "xe_guc_ads.h"
+#include <linux/fault-inject.h>
+
#include <drm/drm_managed.h>
#include <generated/xe_wa_oob.h>
@@ -18,6 +20,7 @@
#include "xe_gt_ccs_mode.h"
#include "xe_gt_printk.h"
#include "xe_guc.h"
+#include "xe_guc_capture.h"
#include "xe_guc_ct.h"
#include "xe_hw_engine.h"
#include "xe_lrc.h"
@@ -149,8 +152,7 @@ static u32 guc_ads_waklv_size(struct xe_guc_ads *ads)
static size_t guc_ads_capture_size(struct xe_guc_ads *ads)
{
- /* FIXME: Allocate a proper capture list */
- return PAGE_ALIGN(PAGE_SIZE);
+ return PAGE_ALIGN(ads->capture_size);
}
static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads)
@@ -357,6 +359,11 @@ static void guc_waklv_init(struct xe_guc_ads *ads)
GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE,
&offset, &remain);
+ if (XE_WA(gt, 14022866841))
+ guc_waklv_enable_simple(ads,
+ GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO,
+ &offset, &remain);
+
/*
* On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now,
* the default value for this register is determined to be 0xC40. This could change in the
@@ -404,6 +411,7 @@ int xe_guc_ads_init(struct xe_guc_ads *ads)
struct xe_bo *bo;
ads->golden_lrc_size = calculate_golden_lrc_size(ads);
+ ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads));
ads->regset_size = calculate_regset_size(gt);
ads->ads_waklv_size = calculate_waklv_size(ads);
@@ -418,14 +426,15 @@ int xe_guc_ads_init(struct xe_guc_ads *ads)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_guc_ads_init, ERRNO); /* See xe_pci_probe() */
/**
* xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load
* @ads: Additional data structures object
*
- * Recalcuate golden_lrc_size & regset_size as the number hardware engines may
- * have changed after the hwconfig was loaded. Also verify the new sizes fit in
- * the already allocated ADS buffer object.
+ * Recalculate golden_lrc_size, capture_size and regset_size as the number
+ * hardware engines may have changed after the hwconfig was loaded. Also verify
+ * the new sizes fit in the already allocated ADS buffer object.
*
* Return: 0 on success, negative error code on error.
*/
@@ -437,6 +446,8 @@ int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads)
xe_gt_assert(gt, ads->bo);
ads->golden_lrc_size = calculate_golden_lrc_size(ads);
+ /* Calculate Capture size with worst size */
+ ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads));
ads->regset_size = calculate_regset_size(gt);
xe_gt_assert(gt, ads->golden_lrc_size +
@@ -536,20 +547,148 @@ static void guc_mapping_table_init(struct xe_gt *gt,
}
}
-static void guc_capture_list_init(struct xe_guc_ads *ads)
+static u32 guc_get_capture_engine_mask(struct xe_gt *gt, struct iosys_map *info_map,
+ enum guc_capture_list_class_type capture_class)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 mask;
+
+ switch (capture_class) {
+ case GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE:
+ mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS]);
+ mask |= info_map_read(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS]);
+ break;
+ case GUC_CAPTURE_LIST_CLASS_VIDEO:
+ mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS]);
+ break;
+ case GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE:
+ mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS]);
+ break;
+ case GUC_CAPTURE_LIST_CLASS_BLITTER:
+ mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS]);
+ break;
+ case GUC_CAPTURE_LIST_CLASS_GSC_OTHER:
+ mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS]);
+ break;
+ default:
+ mask = 0;
+ }
+
+ return mask;
+}
+
+static inline bool get_capture_list(struct xe_guc_ads *ads, struct xe_guc *guc, struct xe_gt *gt,
+ int owner, int type, int class, u32 *total_size, size_t *size,
+ void **pptr)
+{
+ *size = 0;
+
+ if (!xe_guc_capture_getlistsize(guc, owner, type, class, size)) {
+ if (*total_size + *size > ads->capture_size)
+ xe_gt_dbg(gt, "Capture size overflow :%zu vs %d\n",
+ *total_size + *size, ads->capture_size);
+ else if (!xe_guc_capture_getlist(guc, owner, type, class, pptr))
+ return false;
+ }
+
+ return true;
+}
+
+static int guc_capture_prep_lists(struct xe_guc_ads *ads)
{
+ struct xe_guc *guc = ads_to_guc(ads);
+ struct xe_gt *gt = ads_to_gt(ads);
+ u32 ads_ggtt, capture_offset, null_ggtt, total_size = 0;
+ struct iosys_map info_map;
+ size_t size = 0;
+ void *ptr;
int i, j;
- u32 addr = xe_bo_ggtt_addr(ads->bo) + guc_ads_capture_offset(ads);
- /* FIXME: Populate a proper capture list */
+ /*
+ * GuC Capture's steered reg-list needs to be allocated and initialized
+ * after the GuC-hwconfig is available which guaranteed from here.
+ */
+ xe_guc_capture_steered_list_init(ads_to_guc(ads));
+
+ capture_offset = guc_ads_capture_offset(ads);
+ ads_ggtt = xe_bo_ggtt_addr(ads->bo);
+ info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+ offsetof(struct __guc_ads_blob, system_info));
+
+ /* first, set aside the first page for a capture_list with zero descriptors */
+ total_size = PAGE_SIZE;
+ if (!xe_guc_capture_getnullheader(guc, &ptr, &size))
+ xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr, size);
+
+ null_ggtt = ads_ggtt + capture_offset;
+ capture_offset += PAGE_SIZE;
+
+ /*
+ * Populate capture list : at this point adps is already allocated and
+ * mapped to worst case size
+ */
for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
- for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) {
- ads_blob_write(ads, ads.capture_instance[i][j], addr);
- ads_blob_write(ads, ads.capture_class[i][j], addr);
+ bool write_empty_list;
+
+ for (j = 0; j < GUC_CAPTURE_LIST_CLASS_MAX; j++) {
+ u32 engine_mask = guc_get_capture_engine_mask(gt, &info_map, j);
+ /* null list if we dont have said engine or list */
+ if (!engine_mask) {
+ ads_blob_write(ads, ads.capture_class[i][j], null_ggtt);
+ ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt);
+ continue;
+ }
+
+ /* engine exists: start with engine-class registers */
+ write_empty_list = get_capture_list(ads, guc, gt, i,
+ GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
+ j, &total_size, &size, &ptr);
+ if (!write_empty_list) {
+ ads_blob_write(ads, ads.capture_class[i][j],
+ ads_ggtt + capture_offset);
+ xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset,
+ ptr, size);
+ total_size += size;
+ capture_offset += size;
+ } else {
+ ads_blob_write(ads, ads.capture_class[i][j], null_ggtt);
+ }
+
+ /* engine exists: next, engine-instance registers */
+ write_empty_list = get_capture_list(ads, guc, gt, i,
+ GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
+ j, &total_size, &size, &ptr);
+ if (!write_empty_list) {
+ ads_blob_write(ads, ads.capture_instance[i][j],
+ ads_ggtt + capture_offset);
+ xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset,
+ ptr, size);
+ total_size += size;
+ capture_offset += size;
+ } else {
+ ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt);
+ }
}
- ads_blob_write(ads, ads.capture_global[i], addr);
+ /* global registers is last in our PF/VF loops */
+ write_empty_list = get_capture_list(ads, guc, gt, i,
+ GUC_STATE_CAPTURE_TYPE_GLOBAL,
+ 0, &total_size, &size, &ptr);
+ if (!write_empty_list) {
+ ads_blob_write(ads, ads.capture_global[i], ads_ggtt + capture_offset);
+ xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr,
+ size);
+ total_size += size;
+ capture_offset += size;
+ } else {
+ ads_blob_write(ads, ads.capture_global[i], null_ggtt);
+ }
}
+
+ if (ads->capture_size != PAGE_ALIGN(total_size))
+ xe_gt_dbg(gt, "ADS capture alloc size changed from %d to %d\n",
+ ads->capture_size, PAGE_ALIGN(total_size));
+ return PAGE_ALIGN(total_size);
}
static void guc_mmio_regset_write_one(struct xe_guc_ads *ads,
@@ -684,7 +823,7 @@ static void guc_doorbell_init(struct xe_guc_ads *ads)
if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) {
u32 distdbreg =
- xe_mmio_read32(gt, DIST_DBS_POPULATED);
+ xe_mmio_read32(&gt->mmio, DIST_DBS_POPULATED);
ads_blob_write(ads,
system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
@@ -738,7 +877,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads)
guc_mmio_reg_state_init(ads);
guc_prep_golden_lrc_null(ads);
guc_mapping_table_init(gt, &info_map);
- guc_capture_list_init(ads);
+ guc_capture_prep_lists(ads);
guc_doorbell_init(ads);
guc_waklv_init(ads);
diff --git a/drivers/gpu/drm/xe/xe_guc_ads_types.h b/drivers/gpu/drm/xe/xe_guc_ads_types.h
index 2de5decfe0fd..70c132458ac3 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ads_types.h
@@ -22,6 +22,8 @@ struct xe_guc_ads {
u32 regset_size;
/** @ads_waklv_size: total waklv size supported by platform */
u32 ads_waklv_size;
+ /** @capture_size: size of register set passed to GuC for capture */
+ u32 capture_size;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c
new file mode 100644
index 000000000000..cc72446a5de1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_capture.c
@@ -0,0 +1,1975 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2024 Intel Corporation
+ */
+
+#include <linux/types.h>
+
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_capture_abi.h"
+#include "abi/guc_log_abi.h"
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_guc_regs.h"
+#include "regs/xe_regs.h"
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_exec_queue_types.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_printk.h"
+#include "xe_guc.h"
+#include "xe_guc_ads.h"
+#include "xe_guc_capture.h"
+#include "xe_guc_capture_types.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_exec_queue_types.h"
+#include "xe_guc_log.h"
+#include "xe_guc_submit_types.h"
+#include "xe_guc_submit.h"
+#include "xe_hw_engine_types.h"
+#include "xe_hw_engine.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_sched_job.h"
+
+/*
+ * struct __guc_capture_bufstate
+ *
+ * Book-keeping structure used to track read and write pointers
+ * as we extract error capture data from the GuC-log-buffer's
+ * error-capture region as a stream of dwords.
+ */
+struct __guc_capture_bufstate {
+ u32 size;
+ u32 data_offset;
+ u32 rd;
+ u32 wr;
+};
+
+/*
+ * struct __guc_capture_parsed_output - extracted error capture node
+ *
+ * A single unit of extracted error-capture output data grouped together
+ * at an engine-instance level. We keep these nodes in a linked list.
+ * See cachelist and outlist below.
+ */
+struct __guc_capture_parsed_output {
+ /*
+ * A single set of 3 capture lists: a global-list
+ * an engine-class-list and an engine-instance list.
+ * outlist in __guc_capture_parsed_output will keep
+ * a linked list of these nodes that will eventually
+ * be detached from outlist and attached into to
+ * xe_codedump in response to a context reset
+ */
+ struct list_head link;
+ bool is_partial;
+ u32 eng_class;
+ u32 eng_inst;
+ u32 guc_id;
+ u32 lrca;
+ u32 type;
+ bool locked;
+ enum xe_hw_engine_snapshot_source_id source;
+ struct gcap_reg_list_info {
+ u32 vfid;
+ u32 num_regs;
+ struct guc_mmio_reg *regs;
+ } reginfo[GUC_STATE_CAPTURE_TYPE_MAX];
+#define GCAP_PARSED_REGLIST_INDEX_GLOBAL BIT(GUC_STATE_CAPTURE_TYPE_GLOBAL)
+#define GCAP_PARSED_REGLIST_INDEX_ENGCLASS BIT(GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS)
+};
+
+/*
+ * Define all device tables of GuC error capture register lists
+ * NOTE:
+ * For engine-registers, GuC only needs the register offsets
+ * from the engine-mmio-base
+ *
+ * 64 bit registers need 2 entries for low 32 bit register and high 32 bit
+ * register, for example:
+ * Register data_type flags mask Register name
+ * { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
+ * { XXX_REG_HI(0), REG_64BIT_HI_DW,, 0, 0, "XXX_REG"},
+ * 1. data_type: Indicate is hi/low 32 bit for a 64 bit register
+ * A 64 bit register define requires 2 consecutive entries,
+ * with low dword first and hi dword the second.
+ * 2. Register name: null for incompleted define
+ */
+#define COMMON_XELP_BASE_GLOBAL \
+ { FORCEWAKE_GT, REG_32BIT, 0, 0, "FORCEWAKE_GT"}
+
+#define COMMON_BASE_ENGINE_INSTANCE \
+ { RING_HWSTAM(0), REG_32BIT, 0, 0, "HWSTAM"}, \
+ { RING_HWS_PGA(0), REG_32BIT, 0, 0, "RING_HWS_PGA"}, \
+ { RING_HEAD(0), REG_32BIT, 0, 0, "RING_HEAD"}, \
+ { RING_TAIL(0), REG_32BIT, 0, 0, "RING_TAIL"}, \
+ { RING_CTL(0), REG_32BIT, 0, 0, "RING_CTL"}, \
+ { RING_MI_MODE(0), REG_32BIT, 0, 0, "RING_MI_MODE"}, \
+ { RING_MODE(0), REG_32BIT, 0, 0, "RING_MODE"}, \
+ { RING_ESR(0), REG_32BIT, 0, 0, "RING_ESR"}, \
+ { RING_EMR(0), REG_32BIT, 0, 0, "RING_EMR"}, \
+ { RING_EIR(0), REG_32BIT, 0, 0, "RING_EIR"}, \
+ { RING_IMR(0), REG_32BIT, 0, 0, "RING_IMR"}, \
+ { RING_IPEHR(0), REG_32BIT, 0, 0, "IPEHR"}, \
+ { RING_INSTDONE(0), REG_32BIT, 0, 0, "RING_INSTDONE"}, \
+ { INDIRECT_RING_STATE(0), REG_32BIT, 0, 0, "INDIRECT_RING_STATE"}, \
+ { RING_ACTHD(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
+ { RING_ACTHD_UDW(0), REG_64BIT_HI_DW, 0, 0, "ACTHD"}, \
+ { RING_BBADDR(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
+ { RING_BBADDR_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_BBADDR"}, \
+ { RING_START(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
+ { RING_START_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_START"}, \
+ { RING_DMA_FADD(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
+ { RING_DMA_FADD_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_DMA_FADD"}, \
+ { RING_EXECLIST_STATUS_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
+ { RING_EXECLIST_STATUS_HI(0), REG_64BIT_HI_DW, 0, 0, "RING_EXECLIST_STATUS"}, \
+ { RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
+ { RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0, 0, "RING_EXECLIST_SQ_CONTENTS"}
+
+#define COMMON_XELP_RC_CLASS \
+ { RCU_MODE, REG_32BIT, 0, 0, "RCU_MODE"}
+
+#define COMMON_XELP_RC_CLASS_INSTDONE \
+ { SC_INSTDONE, REG_32BIT, 0, 0, "SC_INSTDONE"}, \
+ { SC_INSTDONE_EXTRA, REG_32BIT, 0, 0, "SC_INSTDONE_EXTRA"}, \
+ { SC_INSTDONE_EXTRA2, REG_32BIT, 0, 0, "SC_INSTDONE_EXTRA2"}
+
+#define XELP_VEC_CLASS_REGS \
+ { SFC_DONE(0), 0, 0, 0, "SFC_DONE[0]"}, \
+ { SFC_DONE(1), 0, 0, 0, "SFC_DONE[1]"}, \
+ { SFC_DONE(2), 0, 0, 0, "SFC_DONE[2]"}, \
+ { SFC_DONE(3), 0, 0, 0, "SFC_DONE[3]"}
+
+/* XE_LP Global */
+static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = {
+ COMMON_XELP_BASE_GLOBAL,
+};
+
+/* Render / Compute Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_rc_inst_regs[] = {
+ COMMON_BASE_ENGINE_INSTANCE,
+};
+
+/* Render / Compute Engine-Class */
+static const struct __guc_mmio_reg_descr xe_rc_class_regs[] = {
+ COMMON_XELP_RC_CLASS,
+ COMMON_XELP_RC_CLASS_INSTDONE,
+};
+
+/* Render / Compute Engine-Class for xehpg */
+static const struct __guc_mmio_reg_descr xe_hpg_rc_class_regs[] = {
+ COMMON_XELP_RC_CLASS,
+};
+
+/* Media Decode/Encode Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_vd_inst_regs[] = {
+ COMMON_BASE_ENGINE_INSTANCE,
+};
+
+/* Video Enhancement Engine-Class */
+static const struct __guc_mmio_reg_descr xe_vec_class_regs[] = {
+ XELP_VEC_CLASS_REGS,
+};
+
+/* Video Enhancement Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_vec_inst_regs[] = {
+ COMMON_BASE_ENGINE_INSTANCE,
+};
+
+/* Blitter Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_blt_inst_regs[] = {
+ COMMON_BASE_ENGINE_INSTANCE,
+};
+
+/* XE_LP - GSC Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_lp_gsc_inst_regs[] = {
+ COMMON_BASE_ENGINE_INSTANCE,
+};
+
+/*
+ * Empty list to prevent warnings about unknown class/instance types
+ * as not all class/instance types have entries on all platforms.
+ */
+static const struct __guc_mmio_reg_descr empty_regs_list[] = {
+};
+
+#define TO_GCAP_DEF_OWNER(x) (GUC_CAPTURE_LIST_INDEX_##x)
+#define TO_GCAP_DEF_TYPE(x) (GUC_STATE_CAPTURE_TYPE_##x)
+#define MAKE_REGLIST(regslist, regsowner, regstype, class) \
+ { \
+ regslist, \
+ ARRAY_SIZE(regslist), \
+ TO_GCAP_DEF_OWNER(regsowner), \
+ TO_GCAP_DEF_TYPE(regstype), \
+ class \
+ }
+
+/* List of lists for legacy graphic product version < 1255 */
+static const struct __guc_mmio_reg_descr_group xe_lp_lists[] = {
+ MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0),
+ MAKE_REGLIST(xe_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
+ MAKE_REGLIST(xe_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO),
+ MAKE_REGLIST(xe_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO),
+ MAKE_REGLIST(xe_vec_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
+ MAKE_REGLIST(xe_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER),
+ MAKE_REGLIST(xe_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
+ MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
+ {}
+};
+
+ /* List of lists for graphic product version >= 1255 */
+static const struct __guc_mmio_reg_descr_group xe_hpg_lists[] = {
+ MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0),
+ MAKE_REGLIST(xe_hpg_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
+ MAKE_REGLIST(xe_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO),
+ MAKE_REGLIST(xe_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO),
+ MAKE_REGLIST(xe_vec_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
+ MAKE_REGLIST(xe_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER),
+ MAKE_REGLIST(xe_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
+ MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
+ {}
+};
+
+static const char * const capture_list_type_names[] = {
+ "Global",
+ "Class",
+ "Instance",
+};
+
+static const char * const capture_engine_class_names[] = {
+ "Render/Compute",
+ "Video",
+ "VideoEnhance",
+ "Blitter",
+ "GSC-Other",
+};
+
+struct __guc_capture_ads_cache {
+ bool is_valid;
+ void *ptr;
+ size_t size;
+ int status;
+};
+
+struct xe_guc_state_capture {
+ const struct __guc_mmio_reg_descr_group *reglists;
+ /**
+ * NOTE: steered registers have multiple instances depending on the HW configuration
+ * (slices or dual-sub-slices) and thus depends on HW fuses discovered
+ */
+ struct __guc_mmio_reg_descr_group *extlists;
+ struct __guc_capture_ads_cache ads_cache[GUC_CAPTURE_LIST_INDEX_MAX]
+ [GUC_STATE_CAPTURE_TYPE_MAX]
+ [GUC_CAPTURE_LIST_CLASS_MAX];
+ void *ads_null_cache;
+ struct list_head cachelist;
+#define PREALLOC_NODES_MAX_COUNT (3 * GUC_MAX_ENGINE_CLASSES * GUC_MAX_INSTANCES_PER_CLASS)
+#define PREALLOC_NODES_DEFAULT_NUMREGS 64
+
+ int max_mmio_per_node;
+ struct list_head outlist;
+};
+
+static void
+guc_capture_remove_stale_matches_from_list(struct xe_guc_state_capture *gc,
+ struct __guc_capture_parsed_output *node);
+
+static const struct __guc_mmio_reg_descr_group *
+guc_capture_get_device_reglist(struct xe_device *xe)
+{
+ if (GRAPHICS_VERx100(xe) >= 1255)
+ return xe_hpg_lists;
+ else
+ return xe_lp_lists;
+}
+
+static const struct __guc_mmio_reg_descr_group *
+guc_capture_get_one_list(const struct __guc_mmio_reg_descr_group *reglists,
+ u32 owner, u32 type, enum guc_capture_list_class_type capture_class)
+{
+ int i;
+
+ if (!reglists)
+ return NULL;
+
+ for (i = 0; reglists[i].list; ++i) {
+ if (reglists[i].owner == owner && reglists[i].type == type &&
+ (reglists[i].engine == capture_class ||
+ reglists[i].type == GUC_STATE_CAPTURE_TYPE_GLOBAL))
+ return &reglists[i];
+ }
+
+ return NULL;
+}
+
+const struct __guc_mmio_reg_descr_group *
+xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type,
+ enum guc_capture_list_class_type capture_class, bool is_ext)
+{
+ const struct __guc_mmio_reg_descr_group *reglists;
+
+ if (is_ext) {
+ struct xe_guc *guc = &gt->uc.guc;
+
+ reglists = guc->capture->extlists;
+ } else {
+ reglists = guc_capture_get_device_reglist(gt_to_xe(gt));
+ }
+ return guc_capture_get_one_list(reglists, owner, type, capture_class);
+}
+
+struct __ext_steer_reg {
+ const char *name;
+ struct xe_reg_mcr reg;
+};
+
+static const struct __ext_steer_reg xe_extregs[] = {
+ {"SAMPLER_INSTDONE", SAMPLER_INSTDONE},
+ {"ROW_INSTDONE", ROW_INSTDONE}
+};
+
+static const struct __ext_steer_reg xehpg_extregs[] = {
+ {"SC_INSTDONE", XEHPG_SC_INSTDONE},
+ {"SC_INSTDONE_EXTRA", XEHPG_SC_INSTDONE_EXTRA},
+ {"SC_INSTDONE_EXTRA2", XEHPG_SC_INSTDONE_EXTRA2},
+ {"INSTDONE_GEOM_SVGUNIT", XEHPG_INSTDONE_GEOM_SVGUNIT}
+};
+
+static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
+ const struct __ext_steer_reg *extlist,
+ int slice_id, int subslice_id)
+{
+ if (!ext || !extlist)
+ return;
+
+ ext->reg = XE_REG(extlist->reg.__reg.addr);
+ ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1);
+ ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id);
+ ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id);
+ ext->regname = extlist->name;
+}
+
+static int
+__alloc_ext_regs(struct drm_device *drm, struct __guc_mmio_reg_descr_group *newlist,
+ const struct __guc_mmio_reg_descr_group *rootlist, int num_regs)
+{
+ struct __guc_mmio_reg_descr *list;
+
+ list = drmm_kzalloc(drm, num_regs * sizeof(struct __guc_mmio_reg_descr), GFP_KERNEL);
+ if (!list)
+ return -ENOMEM;
+
+ newlist->list = list;
+ newlist->num_regs = num_regs;
+ newlist->owner = rootlist->owner;
+ newlist->engine = rootlist->engine;
+ newlist->type = rootlist->type;
+
+ return 0;
+}
+
+static int guc_capture_get_steer_reg_num(struct xe_device *xe)
+{
+ int num = ARRAY_SIZE(xe_extregs);
+
+ if (GRAPHICS_VERx100(xe) >= 1255)
+ num += ARRAY_SIZE(xehpg_extregs);
+
+ return num;
+}
+
+static void guc_capture_alloc_steered_lists(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ u16 slice, subslice;
+ int iter, i, total = 0;
+ const struct __guc_mmio_reg_descr_group *lists = guc->capture->reglists;
+ const struct __guc_mmio_reg_descr_group *list;
+ struct __guc_mmio_reg_descr_group *extlists;
+ struct __guc_mmio_reg_descr *extarray;
+ bool has_xehpg_extregs = GRAPHICS_VERx100(gt_to_xe(gt)) >= 1255;
+ struct drm_device *drm = &gt_to_xe(gt)->drm;
+ bool has_rcs_ccs = false;
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+
+ /*
+ * If GT has no rcs/ccs, no need to alloc steered list.
+ * Currently, only rcs/ccs has steering register, if in the future,
+ * other engine types has steering register, this condition check need
+ * to be extended
+ */
+ for_each_hw_engine(hwe, gt, id) {
+ if (xe_engine_class_to_guc_capture_class(hwe->class) ==
+ GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) {
+ has_rcs_ccs = true;
+ break;
+ }
+ }
+
+ if (!has_rcs_ccs)
+ return;
+
+ /* steered registers currently only exist for the render-class */
+ list = guc_capture_get_one_list(lists, GUC_CAPTURE_LIST_INDEX_PF,
+ GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
+ GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE);
+ /*
+ * Skip if this platform has no engine class registers or if extlists
+ * was previously allocated
+ */
+ if (!list || guc->capture->extlists)
+ return;
+
+ total = bitmap_weight(gt->fuse_topo.g_dss_mask, sizeof(gt->fuse_topo.g_dss_mask) * 8) *
+ guc_capture_get_steer_reg_num(guc_to_xe(guc));
+
+ if (!total)
+ return;
+
+ /* allocate an extra for an end marker */
+ extlists = drmm_kzalloc(drm, 2 * sizeof(struct __guc_mmio_reg_descr_group), GFP_KERNEL);
+ if (!extlists)
+ return;
+
+ if (__alloc_ext_regs(drm, &extlists[0], list, total)) {
+ drmm_kfree(drm, extlists);
+ return;
+ }
+
+ /* For steering registers, the list is generated at run-time */
+ extarray = (struct __guc_mmio_reg_descr *)extlists[0].list;
+ for_each_dss_steering(iter, gt, slice, subslice) {
+ for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) {
+ __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice);
+ ++extarray;
+ }
+
+ if (has_xehpg_extregs)
+ for (i = 0; i < ARRAY_SIZE(xehpg_extregs); ++i) {
+ __fill_ext_reg(extarray, &xehpg_extregs[i], slice, subslice);
+ ++extarray;
+ }
+ }
+
+ extlists[0].num_regs = total;
+
+ xe_gt_dbg(guc_to_gt(guc), "capture found %d ext-regs.\n", total);
+ guc->capture->extlists = extlists;
+}
+
+static int
+guc_capture_list_init(struct xe_guc *guc, u32 owner, u32 type,
+ enum guc_capture_list_class_type capture_class, struct guc_mmio_reg *ptr,
+ u16 num_entries)
+{
+ u32 ptr_idx = 0, list_idx = 0;
+ const struct __guc_mmio_reg_descr_group *reglists = guc->capture->reglists;
+ struct __guc_mmio_reg_descr_group *extlists = guc->capture->extlists;
+ const struct __guc_mmio_reg_descr_group *match;
+ u32 list_num;
+
+ if (!reglists)
+ return -ENODEV;
+
+ match = guc_capture_get_one_list(reglists, owner, type, capture_class);
+ if (!match)
+ return -ENODATA;
+
+ list_num = match->num_regs;
+ for (list_idx = 0; ptr_idx < num_entries && list_idx < list_num; ++list_idx, ++ptr_idx) {
+ ptr[ptr_idx].offset = match->list[list_idx].reg.addr;
+ ptr[ptr_idx].value = 0xDEADF00D;
+ ptr[ptr_idx].flags = match->list[list_idx].flags;
+ ptr[ptr_idx].mask = match->list[list_idx].mask;
+ }
+
+ match = guc_capture_get_one_list(extlists, owner, type, capture_class);
+ if (match)
+ for (ptr_idx = list_num, list_idx = 0;
+ ptr_idx < num_entries && list_idx < match->num_regs;
+ ++ptr_idx, ++list_idx) {
+ ptr[ptr_idx].offset = match->list[list_idx].reg.addr;
+ ptr[ptr_idx].value = 0xDEADF00D;
+ ptr[ptr_idx].flags = match->list[list_idx].flags;
+ ptr[ptr_idx].mask = match->list[list_idx].mask;
+ }
+
+ if (ptr_idx < num_entries)
+ xe_gt_dbg(guc_to_gt(guc), "Got short capture reglist init: %d out-of %d.\n",
+ ptr_idx, num_entries);
+
+ return 0;
+}
+
+static int
+guc_cap_list_num_regs(struct xe_guc *guc, u32 owner, u32 type,
+ enum guc_capture_list_class_type capture_class)
+{
+ const struct __guc_mmio_reg_descr_group *match;
+ int num_regs = 0;
+
+ match = guc_capture_get_one_list(guc->capture->reglists, owner, type, capture_class);
+ if (match)
+ num_regs = match->num_regs;
+
+ match = guc_capture_get_one_list(guc->capture->extlists, owner, type, capture_class);
+ if (match)
+ num_regs += match->num_regs;
+ else
+ /*
+ * If a caller wants the full register dump size but we have
+ * not yet got the hw-config, which is before max_mmio_per_node
+ * is initialized, then provide a worst-case number for
+ * extlists based on max dss fuse bits, but only ever for
+ * render/compute
+ */
+ if (owner == GUC_CAPTURE_LIST_INDEX_PF &&
+ type == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
+ capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE &&
+ !guc->capture->max_mmio_per_node)
+ num_regs += guc_capture_get_steer_reg_num(guc_to_xe(guc)) *
+ XE_MAX_DSS_FUSE_BITS;
+
+ return num_regs;
+}
+
+static int
+guc_capture_getlistsize(struct xe_guc *guc, u32 owner, u32 type,
+ enum guc_capture_list_class_type capture_class,
+ size_t *size, bool is_purpose_est)
+{
+ struct xe_guc_state_capture *gc = guc->capture;
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct __guc_capture_ads_cache *cache;
+ int num_regs;
+
+ xe_gt_assert(gt, type < GUC_STATE_CAPTURE_TYPE_MAX);
+ xe_gt_assert(gt, capture_class < GUC_CAPTURE_LIST_CLASS_MAX);
+
+ cache = &gc->ads_cache[owner][type][capture_class];
+ if (!gc->reglists) {
+ xe_gt_warn(gt, "No capture reglist for this device\n");
+ return -ENODEV;
+ }
+
+ if (cache->is_valid) {
+ *size = cache->size;
+ return cache->status;
+ }
+
+ if (!is_purpose_est && owner == GUC_CAPTURE_LIST_INDEX_PF &&
+ !guc_capture_get_one_list(gc->reglists, owner, type, capture_class)) {
+ if (type == GUC_STATE_CAPTURE_TYPE_GLOBAL)
+ xe_gt_warn(gt, "Missing capture reglist: global!\n");
+ else
+ xe_gt_warn(gt, "Missing capture reglist: %s(%u):%s(%u)!\n",
+ capture_list_type_names[type], type,
+ capture_engine_class_names[capture_class], capture_class);
+ return -ENODEV;
+ }
+
+ num_regs = guc_cap_list_num_regs(guc, owner, type, capture_class);
+ /* intentional empty lists can exist depending on hw config */
+ if (!num_regs)
+ return -ENODATA;
+
+ if (size)
+ *size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) +
+ (num_regs * sizeof(struct guc_mmio_reg)));
+
+ return 0;
+}
+
+/**
+ * xe_guc_capture_getlistsize - Get list size for owner/type/class combination
+ * @guc: The GuC object
+ * @owner: PF/VF owner
+ * @type: GuC capture register type
+ * @capture_class: GuC capture engine class id
+ * @size: Point to the size
+ *
+ * This function will get the list for the owner/type/class combination, and
+ * return the page aligned list size.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+int
+xe_guc_capture_getlistsize(struct xe_guc *guc, u32 owner, u32 type,
+ enum guc_capture_list_class_type capture_class, size_t *size)
+{
+ return guc_capture_getlistsize(guc, owner, type, capture_class, size, false);
+}
+
+/**
+ * xe_guc_capture_getlist - Get register capture list for owner/type/class
+ * combination
+ * @guc: The GuC object
+ * @owner: PF/VF owner
+ * @type: GuC capture register type
+ * @capture_class: GuC capture engine class id
+ * @outptr: Point to cached register capture list
+ *
+ * This function will get the register capture list for the owner/type/class
+ * combination.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+int
+xe_guc_capture_getlist(struct xe_guc *guc, u32 owner, u32 type,
+ enum guc_capture_list_class_type capture_class, void **outptr)
+{
+ struct xe_guc_state_capture *gc = guc->capture;
+ struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][capture_class];
+ struct guc_debug_capture_list *listnode;
+ int ret, num_regs;
+ u8 *caplist, *tmp;
+ size_t size = 0;
+
+ if (!gc->reglists)
+ return -ENODEV;
+
+ if (cache->is_valid) {
+ *outptr = cache->ptr;
+ return cache->status;
+ }
+
+ ret = xe_guc_capture_getlistsize(guc, owner, type, capture_class, &size);
+ if (ret) {
+ cache->is_valid = true;
+ cache->ptr = NULL;
+ cache->size = 0;
+ cache->status = ret;
+ return ret;
+ }
+
+ caplist = drmm_kzalloc(guc_to_drm(guc), size, GFP_KERNEL);
+ if (!caplist)
+ return -ENOMEM;
+
+ /* populate capture list header */
+ tmp = caplist;
+ num_regs = guc_cap_list_num_regs(guc, owner, type, capture_class);
+ listnode = (struct guc_debug_capture_list *)tmp;
+ listnode->header.info = FIELD_PREP(GUC_CAPTURELISTHDR_NUMDESCR, (u32)num_regs);
+
+ /* populate list of register descriptor */
+ tmp += sizeof(struct guc_debug_capture_list);
+ guc_capture_list_init(guc, owner, type, capture_class,
+ (struct guc_mmio_reg *)tmp, num_regs);
+
+ /* cache this list */
+ cache->is_valid = true;
+ cache->ptr = caplist;
+ cache->size = size;
+ cache->status = 0;
+
+ *outptr = caplist;
+
+ return 0;
+}
+
+/**
+ * xe_guc_capture_getnullheader - Get a null list for register capture
+ * @guc: The GuC object
+ * @outptr: Point to cached register capture list
+ * @size: Point to the size
+ *
+ * This function will alloc for a null list for register capture.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+int
+xe_guc_capture_getnullheader(struct xe_guc *guc, void **outptr, size_t *size)
+{
+ struct xe_guc_state_capture *gc = guc->capture;
+ int tmp = sizeof(u32) * 4;
+ void *null_header;
+
+ if (gc->ads_null_cache) {
+ *outptr = gc->ads_null_cache;
+ *size = tmp;
+ return 0;
+ }
+
+ null_header = drmm_kzalloc(guc_to_drm(guc), tmp, GFP_KERNEL);
+ if (!null_header)
+ return -ENOMEM;
+
+ gc->ads_null_cache = null_header;
+ *outptr = null_header;
+ *size = tmp;
+
+ return 0;
+}
+
+/**
+ * xe_guc_capture_ads_input_worst_size - Calculate the worst size for GuC register capture
+ * @guc: point to xe_guc structure
+ *
+ * Calculate the worst size for GuC register capture by including all possible engines classes.
+ *
+ * Returns: Calculated size
+ */
+size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc)
+{
+ size_t total_size, class_size, instance_size, global_size;
+ int i, j;
+
+ /*
+ * This function calculates the worst case register lists size by
+ * including all possible engines classes. It is called during the
+ * first of a two-phase GuC (and ADS-population) initialization
+ * sequence, that is, during the pre-hwconfig phase before we have
+ * the exact engine fusing info.
+ */
+ total_size = PAGE_SIZE; /* Pad a page in front for empty lists */
+ for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
+ for (j = 0; j < GUC_CAPTURE_LIST_CLASS_MAX; j++) {
+ if (xe_guc_capture_getlistsize(guc, i,
+ GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
+ j, &class_size) < 0)
+ class_size = 0;
+ if (xe_guc_capture_getlistsize(guc, i,
+ GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
+ j, &instance_size) < 0)
+ instance_size = 0;
+ total_size += class_size + instance_size;
+ }
+ if (xe_guc_capture_getlistsize(guc, i,
+ GUC_STATE_CAPTURE_TYPE_GLOBAL,
+ 0, &global_size) < 0)
+ global_size = 0;
+ total_size += global_size;
+ }
+
+ return PAGE_ALIGN(total_size);
+}
+
+static int guc_capture_output_size_est(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+
+ int capture_size = 0;
+ size_t tmp = 0;
+
+ if (!guc->capture)
+ return -ENODEV;
+
+ /*
+ * If every single engine-instance suffered a failure in quick succession but
+ * were all unrelated, then a burst of multiple error-capture events would dump
+ * registers for every one engine instance, one at a time. In this case, GuC
+ * would even dump the global-registers repeatedly.
+ *
+ * For each engine instance, there would be 1 x guc_state_capture_group_t output
+ * followed by 3 x guc_state_capture_t lists. The latter is how the register
+ * dumps are split across different register types (where the '3' are global vs class
+ * vs instance).
+ */
+ for_each_hw_engine(hwe, gt, id) {
+ enum guc_capture_list_class_type capture_class;
+
+ capture_class = xe_engine_class_to_guc_capture_class(hwe->class);
+ capture_size += sizeof(struct guc_state_capture_group_header_t) +
+ (3 * sizeof(struct guc_state_capture_header_t));
+
+ if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_GLOBAL,
+ 0, &tmp, true))
+ capture_size += tmp;
+ if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
+ capture_class, &tmp, true))
+ capture_size += tmp;
+ if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
+ capture_class, &tmp, true))
+ capture_size += tmp;
+ }
+
+ return capture_size;
+}
+
+/*
+ * Add on a 3x multiplier to allow for multiple back-to-back captures occurring
+ * before the Xe can read the data out and process it
+ */
+#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
+
+static void check_guc_capture_size(struct xe_guc *guc)
+{
+ int capture_size = guc_capture_output_size_est(guc);
+ int spare_size = capture_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER;
+ u32 buffer_size = xe_guc_log_section_size_capture(&guc->log);
+
+ /*
+ * NOTE: capture_size is much smaller than the capture region
+ * allocation (DG2: <80K vs 1MB).
+ * Additionally, its based on space needed to fit all engines getting
+ * reset at once within the same G2H handler task slot. This is very
+ * unlikely. However, if GuC really does run out of space for whatever
+ * reason, we will see an separate warning message when processing the
+ * G2H event capture-notification, search for:
+ * xe_guc_STATE_CAPTURE_EVENT_STATUS_NOSPACE.
+ */
+ if (capture_size < 0)
+ xe_gt_dbg(guc_to_gt(guc),
+ "Failed to calculate error state capture buffer minimum size: %d!\n",
+ capture_size);
+ if (capture_size > buffer_size)
+ xe_gt_dbg(guc_to_gt(guc), "Error state capture buffer maybe small: %d < %d\n",
+ buffer_size, capture_size);
+ else if (spare_size > buffer_size)
+ xe_gt_dbg(guc_to_gt(guc),
+ "Error state capture buffer lacks spare size: %d < %d (min = %d)\n",
+ buffer_size, spare_size, capture_size);
+}
+
+static void
+guc_capture_add_node_to_list(struct __guc_capture_parsed_output *node,
+ struct list_head *list)
+{
+ list_add(&node->link, list);
+}
+
+static void
+guc_capture_add_node_to_outlist(struct xe_guc_state_capture *gc,
+ struct __guc_capture_parsed_output *node)
+{
+ guc_capture_remove_stale_matches_from_list(gc, node);
+ guc_capture_add_node_to_list(node, &gc->outlist);
+}
+
+static void
+guc_capture_add_node_to_cachelist(struct xe_guc_state_capture *gc,
+ struct __guc_capture_parsed_output *node)
+{
+ guc_capture_add_node_to_list(node, &gc->cachelist);
+}
+
+static void
+guc_capture_free_outlist_node(struct xe_guc_state_capture *gc,
+ struct __guc_capture_parsed_output *n)
+{
+ if (n) {
+ n->locked = 0;
+ list_del(&n->link);
+ /* put node back to cache list */
+ guc_capture_add_node_to_cachelist(gc, n);
+ }
+}
+
+static void
+guc_capture_remove_stale_matches_from_list(struct xe_guc_state_capture *gc,
+ struct __guc_capture_parsed_output *node)
+{
+ struct __guc_capture_parsed_output *n, *ntmp;
+ int guc_id = node->guc_id;
+
+ list_for_each_entry_safe(n, ntmp, &gc->outlist, link) {
+ if (n != node && !n->locked && n->guc_id == guc_id)
+ guc_capture_free_outlist_node(gc, n);
+ }
+}
+
+static void
+guc_capture_init_node(struct xe_guc *guc, struct __guc_capture_parsed_output *node)
+{
+ struct guc_mmio_reg *tmp[GUC_STATE_CAPTURE_TYPE_MAX];
+ int i;
+
+ for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
+ tmp[i] = node->reginfo[i].regs;
+ memset(tmp[i], 0, sizeof(struct guc_mmio_reg) *
+ guc->capture->max_mmio_per_node);
+ }
+ memset(node, 0, sizeof(*node));
+ for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i)
+ node->reginfo[i].regs = tmp[i];
+
+ INIT_LIST_HEAD(&node->link);
+}
+
+/**
+ * DOC: Init, G2H-event and reporting flows for GuC-error-capture
+ *
+ * KMD Init time flows:
+ * --------------------
+ * --> alloc A: GuC input capture regs lists (registered to GuC via ADS).
+ * xe_guc_ads acquires the register lists by calling
+ * xe_guc_capture_getlistsize and xe_guc_capture_getlist 'n' times,
+ * where n = 1 for global-reg-list +
+ * num_engine_classes for class-reg-list +
+ * num_engine_classes for instance-reg-list
+ * (since all instances of the same engine-class type
+ * have an identical engine-instance register-list).
+ * ADS module also calls separately for PF vs VF.
+ *
+ * --> alloc B: GuC output capture buf (registered via guc_init_params(log_param))
+ * Size = #define CAPTURE_BUFFER_SIZE (warns if on too-small)
+ * Note2: 'x 3' to hold multiple capture groups
+ *
+ * GUC Runtime notify capture:
+ * --------------------------
+ * --> G2H STATE_CAPTURE_NOTIFICATION
+ * L--> xe_guc_capture_process
+ * L--> Loop through B (head..tail) and for each engine instance's
+ * err-state-captured register-list we find, we alloc 'C':
+ * --> alloc C: A capture-output-node structure that includes misc capture info along
+ * with 3 register list dumps (global, engine-class and engine-instance)
+ * This node is created from a pre-allocated list of blank nodes in
+ * guc->capture->cachelist and populated with the error-capture
+ * data from GuC and then it's added into guc->capture->outlist linked
+ * list. This list is used for matchup and printout by xe_devcoredump_read
+ * and xe_engine_snapshot_print, (when user invokes the devcoredump sysfs).
+ *
+ * GUC --> notify context reset:
+ * -----------------------------
+ * --> guc_exec_queue_timedout_job
+ * L--> xe_devcoredump
+ * L--> devcoredump_snapshot
+ * --> xe_hw_engine_snapshot_capture
+ * --> xe_engine_manual_capture(For manual capture)
+ *
+ * User Sysfs / Debugfs
+ * --------------------
+ * --> xe_devcoredump_read->
+ * L--> xxx_snapshot_print
+ * L--> xe_engine_snapshot_print
+ * Print register lists values saved at
+ * guc->capture->outlist
+ *
+ */
+
+static int guc_capture_buf_cnt(struct __guc_capture_bufstate *buf)
+{
+ if (buf->wr >= buf->rd)
+ return (buf->wr - buf->rd);
+ return (buf->size - buf->rd) + buf->wr;
+}
+
+static int guc_capture_buf_cnt_to_end(struct __guc_capture_bufstate *buf)
+{
+ if (buf->rd > buf->wr)
+ return (buf->size - buf->rd);
+ return (buf->wr - buf->rd);
+}
+
+/*
+ * GuC's error-capture output is a ring buffer populated in a byte-stream fashion:
+ *
+ * The GuC Log buffer region for error-capture is managed like a ring buffer.
+ * The GuC firmware dumps error capture logs into this ring in a byte-stream flow.
+ * Additionally, as per the current and foreseeable future, all packed error-
+ * capture output structures are dword aligned.
+ *
+ * That said, if the GuC firmware is in the midst of writing a structure that is larger
+ * than one dword but the tail end of the err-capture buffer-region has lesser space left,
+ * we would need to extract that structure one dword at a time straddled across the end,
+ * onto the start of the ring.
+ *
+ * Below function, guc_capture_log_remove_bytes is a helper for that. All callers of this
+ * function would typically do a straight-up memcpy from the ring contents and will only
+ * call this helper if their structure-extraction is straddling across the end of the
+ * ring. GuC firmware does not add any padding. The reason for the no-padding is to ease
+ * scalability for future expansion of output data types without requiring a redesign
+ * of the flow controls.
+ */
+static int
+guc_capture_log_remove_bytes(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
+ void *out, int bytes_needed)
+{
+#define GUC_CAPTURE_LOG_BUF_COPY_RETRY_MAX 3
+
+ int fill_size = 0, tries = GUC_CAPTURE_LOG_BUF_COPY_RETRY_MAX;
+ int copy_size, avail;
+
+ xe_assert(guc_to_xe(guc), bytes_needed % sizeof(u32) == 0);
+
+ if (bytes_needed > guc_capture_buf_cnt(buf))
+ return -1;
+
+ while (bytes_needed > 0 && tries--) {
+ int misaligned;
+
+ avail = guc_capture_buf_cnt_to_end(buf);
+ misaligned = avail % sizeof(u32);
+ /* wrap if at end */
+ if (!avail) {
+ /* output stream clipped */
+ if (!buf->rd)
+ return fill_size;
+ buf->rd = 0;
+ continue;
+ }
+
+ /* Only copy to u32 aligned data */
+ copy_size = avail < bytes_needed ? avail - misaligned : bytes_needed;
+ xe_map_memcpy_from(guc_to_xe(guc), out + fill_size, &guc->log.bo->vmap,
+ buf->data_offset + buf->rd, copy_size);
+ buf->rd += copy_size;
+ fill_size += copy_size;
+ bytes_needed -= copy_size;
+
+ if (misaligned)
+ xe_gt_warn(guc_to_gt(guc),
+ "Bytes extraction not dword aligned, clipping.\n");
+ }
+
+ return fill_size;
+}
+
+static int
+guc_capture_log_get_group_hdr(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
+ struct guc_state_capture_group_header_t *ghdr)
+{
+ int fullsize = sizeof(struct guc_state_capture_group_header_t);
+
+ if (guc_capture_log_remove_bytes(guc, buf, ghdr, fullsize) != fullsize)
+ return -1;
+ return 0;
+}
+
+static int
+guc_capture_log_get_data_hdr(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
+ struct guc_state_capture_header_t *hdr)
+{
+ int fullsize = sizeof(struct guc_state_capture_header_t);
+
+ if (guc_capture_log_remove_bytes(guc, buf, hdr, fullsize) != fullsize)
+ return -1;
+ return 0;
+}
+
+static int
+guc_capture_log_get_register(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
+ struct guc_mmio_reg *reg)
+{
+ int fullsize = sizeof(struct guc_mmio_reg);
+
+ if (guc_capture_log_remove_bytes(guc, buf, reg, fullsize) != fullsize)
+ return -1;
+ return 0;
+}
+
+static struct __guc_capture_parsed_output *
+guc_capture_get_prealloc_node(struct xe_guc *guc)
+{
+ struct __guc_capture_parsed_output *found = NULL;
+
+ if (!list_empty(&guc->capture->cachelist)) {
+ struct __guc_capture_parsed_output *n, *ntmp;
+
+ /* get first avail node from the cache list */
+ list_for_each_entry_safe(n, ntmp, &guc->capture->cachelist, link) {
+ found = n;
+ break;
+ }
+ } else {
+ struct __guc_capture_parsed_output *n, *ntmp;
+
+ /*
+ * traverse reversed and steal back the oldest node already
+ * allocated
+ */
+ list_for_each_entry_safe_reverse(n, ntmp, &guc->capture->outlist, link) {
+ if (!n->locked)
+ found = n;
+ }
+ }
+ if (found) {
+ list_del(&found->link);
+ guc_capture_init_node(guc, found);
+ }
+
+ return found;
+}
+
+static struct __guc_capture_parsed_output *
+guc_capture_clone_node(struct xe_guc *guc, struct __guc_capture_parsed_output *original,
+ u32 keep_reglist_mask)
+{
+ struct __guc_capture_parsed_output *new;
+ int i;
+
+ new = guc_capture_get_prealloc_node(guc);
+ if (!new)
+ return NULL;
+ if (!original)
+ return new;
+
+ new->is_partial = original->is_partial;
+
+ /* copy reg-lists that we want to clone */
+ for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
+ if (keep_reglist_mask & BIT(i)) {
+ XE_WARN_ON(original->reginfo[i].num_regs >
+ guc->capture->max_mmio_per_node);
+
+ memcpy(new->reginfo[i].regs, original->reginfo[i].regs,
+ original->reginfo[i].num_regs * sizeof(struct guc_mmio_reg));
+
+ new->reginfo[i].num_regs = original->reginfo[i].num_regs;
+ new->reginfo[i].vfid = original->reginfo[i].vfid;
+
+ if (i == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS) {
+ new->eng_class = original->eng_class;
+ } else if (i == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
+ new->eng_inst = original->eng_inst;
+ new->guc_id = original->guc_id;
+ new->lrca = original->lrca;
+ }
+ }
+ }
+
+ return new;
+}
+
+static int
+guc_capture_extract_reglists(struct xe_guc *guc, struct __guc_capture_bufstate *buf)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct guc_state_capture_group_header_t ghdr = {0};
+ struct guc_state_capture_header_t hdr = {0};
+ struct __guc_capture_parsed_output *node = NULL;
+ struct guc_mmio_reg *regs = NULL;
+ int i, numlists, numregs, ret = 0;
+ enum guc_state_capture_type datatype;
+ struct guc_mmio_reg tmp;
+ bool is_partial = false;
+
+ i = guc_capture_buf_cnt(buf);
+ if (!i)
+ return -ENODATA;
+
+ if (i % sizeof(u32)) {
+ xe_gt_warn(gt, "Got mis-aligned register capture entries\n");
+ ret = -EIO;
+ goto bailout;
+ }
+
+ /* first get the capture group header */
+ if (guc_capture_log_get_group_hdr(guc, buf, &ghdr)) {
+ ret = -EIO;
+ goto bailout;
+ }
+ /*
+ * we would typically expect a layout as below where n would be expected to be
+ * anywhere between 3 to n where n > 3 if we are seeing multiple dependent engine
+ * instances being reset together.
+ * ____________________________________________
+ * | Capture Group |
+ * | ________________________________________ |
+ * | | Capture Group Header: | |
+ * | | - num_captures = 5 | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture1: | |
+ * | | Hdr: GLOBAL, numregs=a | |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... rega | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture2: | |
+ * | | Hdr: CLASS=RENDER/COMPUTE, numregs=b| |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... regb | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture3: | |
+ * | | Hdr: INSTANCE=RCS, numregs=c | |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... regc | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture4: | |
+ * | | Hdr: CLASS=RENDER/COMPUTE, numregs=d| |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... regd | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture5: | |
+ * | | Hdr: INSTANCE=CCS0, numregs=e | |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... rege | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * |__________________________________________|
+ */
+ is_partial = FIELD_GET(GUC_STATE_CAPTURE_GROUP_HEADER_CAPTURE_GROUP_TYPE, ghdr.info);
+ numlists = FIELD_GET(GUC_STATE_CAPTURE_GROUP_HEADER_NUM_CAPTURES, ghdr.info);
+
+ while (numlists--) {
+ if (guc_capture_log_get_data_hdr(guc, buf, &hdr)) {
+ ret = -EIO;
+ break;
+ }
+
+ datatype = FIELD_GET(GUC_STATE_CAPTURE_HEADER_CAPTURE_TYPE, hdr.info);
+ if (datatype > GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
+ /* unknown capture type - skip over to next capture set */
+ numregs = FIELD_GET(GUC_STATE_CAPTURE_HEADER_NUM_MMIO_ENTRIES,
+ hdr.num_mmio_entries);
+ while (numregs--) {
+ if (guc_capture_log_get_register(guc, buf, &tmp)) {
+ ret = -EIO;
+ break;
+ }
+ }
+ continue;
+ } else if (node) {
+ /*
+ * Based on the current capture type and what we have so far,
+ * decide if we should add the current node into the internal
+ * linked list for match-up when xe_devcoredump calls later
+ * (and alloc a blank node for the next set of reglists)
+ * or continue with the same node or clone the current node
+ * but only retain the global or class registers (such as the
+ * case of dependent engine resets).
+ */
+ if (datatype == GUC_STATE_CAPTURE_TYPE_GLOBAL) {
+ guc_capture_add_node_to_outlist(guc->capture, node);
+ node = NULL;
+ } else if (datatype == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
+ node->reginfo[GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS].num_regs) {
+ /* Add to list, clone node and duplicate global list */
+ guc_capture_add_node_to_outlist(guc->capture, node);
+ node = guc_capture_clone_node(guc, node,
+ GCAP_PARSED_REGLIST_INDEX_GLOBAL);
+ } else if (datatype == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE &&
+ node->reginfo[GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE].num_regs) {
+ /* Add to list, clone node and duplicate global + class lists */
+ guc_capture_add_node_to_outlist(guc->capture, node);
+ node = guc_capture_clone_node(guc, node,
+ (GCAP_PARSED_REGLIST_INDEX_GLOBAL |
+ GCAP_PARSED_REGLIST_INDEX_ENGCLASS));
+ }
+ }
+
+ if (!node) {
+ node = guc_capture_get_prealloc_node(guc);
+ if (!node) {
+ ret = -ENOMEM;
+ break;
+ }
+ if (datatype != GUC_STATE_CAPTURE_TYPE_GLOBAL)
+ xe_gt_dbg(gt, "Register capture missing global dump: %08x!\n",
+ datatype);
+ }
+ node->is_partial = is_partial;
+ node->reginfo[datatype].vfid = FIELD_GET(GUC_STATE_CAPTURE_HEADER_VFID, hdr.owner);
+ node->source = XE_ENGINE_CAPTURE_SOURCE_GUC;
+ node->type = datatype;
+
+ switch (datatype) {
+ case GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE:
+ node->eng_class = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_CLASS,
+ hdr.info);
+ node->eng_inst = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_INSTANCE,
+ hdr.info);
+ node->lrca = hdr.lrca;
+ node->guc_id = hdr.guc_id;
+ break;
+ case GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS:
+ node->eng_class = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_CLASS,
+ hdr.info);
+ break;
+ default:
+ break;
+ }
+
+ numregs = FIELD_GET(GUC_STATE_CAPTURE_HEADER_NUM_MMIO_ENTRIES,
+ hdr.num_mmio_entries);
+ if (numregs > guc->capture->max_mmio_per_node) {
+ xe_gt_dbg(gt, "Register capture list extraction clipped by prealloc!\n");
+ numregs = guc->capture->max_mmio_per_node;
+ }
+ node->reginfo[datatype].num_regs = numregs;
+ regs = node->reginfo[datatype].regs;
+ i = 0;
+ while (numregs--) {
+ if (guc_capture_log_get_register(guc, buf, &regs[i++])) {
+ ret = -EIO;
+ break;
+ }
+ }
+ }
+
+bailout:
+ if (node) {
+ /* If we have data, add to linked list for match-up when xe_devcoredump calls */
+ for (i = GUC_STATE_CAPTURE_TYPE_GLOBAL; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
+ if (node->reginfo[i].regs) {
+ guc_capture_add_node_to_outlist(guc->capture, node);
+ node = NULL;
+ break;
+ }
+ }
+ if (node) /* else return it back to cache list */
+ guc_capture_add_node_to_cachelist(guc->capture, node);
+ }
+ return ret;
+}
+
+static int __guc_capture_flushlog_complete(struct xe_guc *guc)
+{
+ u32 action[] = {
+ XE_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE,
+ GUC_LOG_BUFFER_CAPTURE
+ };
+
+ return xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+static void __guc_capture_process_output(struct xe_guc *guc)
+{
+ unsigned int buffer_size, read_offset, write_offset, full_count;
+ struct xe_uc *uc = container_of(guc, typeof(*uc), guc);
+ struct guc_log_buffer_state log_buf_state_local;
+ struct __guc_capture_bufstate buf;
+ bool new_overflow;
+ int ret, tmp;
+ u32 log_buf_state_offset;
+ u32 src_data_offset;
+
+ log_buf_state_offset = sizeof(struct guc_log_buffer_state) * GUC_LOG_BUFFER_CAPTURE;
+ src_data_offset = xe_guc_get_log_buffer_offset(&guc->log, GUC_LOG_BUFFER_CAPTURE);
+
+ /*
+ * Make a copy of the state structure, inside GuC log buffer
+ * (which is uncached mapped), on the stack to avoid reading
+ * from it multiple times.
+ */
+ xe_map_memcpy_from(guc_to_xe(guc), &log_buf_state_local, &guc->log.bo->vmap,
+ log_buf_state_offset, sizeof(struct guc_log_buffer_state));
+
+ buffer_size = xe_guc_get_log_buffer_size(&guc->log, GUC_LOG_BUFFER_CAPTURE);
+ read_offset = log_buf_state_local.read_ptr;
+ write_offset = log_buf_state_local.sampled_write_ptr;
+ full_count = FIELD_GET(GUC_LOG_BUFFER_STATE_BUFFER_FULL_CNT, log_buf_state_local.flags);
+
+ /* Bookkeeping stuff */
+ tmp = FIELD_GET(GUC_LOG_BUFFER_STATE_FLUSH_TO_FILE, log_buf_state_local.flags);
+ guc->log.stats[GUC_LOG_BUFFER_CAPTURE].flush += tmp;
+ new_overflow = xe_guc_check_log_buf_overflow(&guc->log, GUC_LOG_BUFFER_CAPTURE,
+ full_count);
+
+ /* Now copy the actual logs. */
+ if (unlikely(new_overflow)) {
+ /* copy the whole buffer in case of overflow */
+ read_offset = 0;
+ write_offset = buffer_size;
+ } else if (unlikely((read_offset > buffer_size) ||
+ (write_offset > buffer_size))) {
+ xe_gt_err(guc_to_gt(guc),
+ "Register capture buffer in invalid state: read = 0x%X, size = 0x%X!\n",
+ read_offset, buffer_size);
+ /* copy whole buffer as offsets are unreliable */
+ read_offset = 0;
+ write_offset = buffer_size;
+ }
+
+ buf.size = buffer_size;
+ buf.rd = read_offset;
+ buf.wr = write_offset;
+ buf.data_offset = src_data_offset;
+
+ if (!xe_guc_read_stopped(guc)) {
+ do {
+ ret = guc_capture_extract_reglists(guc, &buf);
+ if (ret && ret != -ENODATA)
+ xe_gt_dbg(guc_to_gt(guc), "Capture extraction failed:%d\n", ret);
+ } while (ret >= 0);
+ }
+
+ /* Update the state of log buffer err-cap state */
+ xe_map_wr(guc_to_xe(guc), &guc->log.bo->vmap,
+ log_buf_state_offset + offsetof(struct guc_log_buffer_state, read_ptr), u32,
+ write_offset);
+
+ /*
+ * Clear the flush_to_file from local first, the local was loaded by above
+ * xe_map_memcpy_from, then write out the "updated local" through
+ * xe_map_wr()
+ */
+ log_buf_state_local.flags &= ~GUC_LOG_BUFFER_STATE_FLUSH_TO_FILE;
+ xe_map_wr(guc_to_xe(guc), &guc->log.bo->vmap,
+ log_buf_state_offset + offsetof(struct guc_log_buffer_state, flags), u32,
+ log_buf_state_local.flags);
+ __guc_capture_flushlog_complete(guc);
+}
+
+/*
+ * xe_guc_capture_process - Process GuC register captured data
+ * @guc: The GuC object
+ *
+ * When GuC captured data is ready, GuC will send message
+ * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be
+ * called to process the data comes with the message.
+ *
+ * Returns: None
+ */
+void xe_guc_capture_process(struct xe_guc *guc)
+{
+ if (guc->capture)
+ __guc_capture_process_output(guc);
+}
+
+static struct __guc_capture_parsed_output *
+guc_capture_alloc_one_node(struct xe_guc *guc)
+{
+ struct drm_device *drm = guc_to_drm(guc);
+ struct __guc_capture_parsed_output *new;
+ int i;
+
+ new = drmm_kzalloc(drm, sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
+ new->reginfo[i].regs = drmm_kzalloc(drm, guc->capture->max_mmio_per_node *
+ sizeof(struct guc_mmio_reg), GFP_KERNEL);
+ if (!new->reginfo[i].regs) {
+ while (i)
+ drmm_kfree(drm, new->reginfo[--i].regs);
+ drmm_kfree(drm, new);
+ return NULL;
+ }
+ }
+ guc_capture_init_node(guc, new);
+
+ return new;
+}
+
+static void
+__guc_capture_create_prealloc_nodes(struct xe_guc *guc)
+{
+ struct __guc_capture_parsed_output *node = NULL;
+ int i;
+
+ for (i = 0; i < PREALLOC_NODES_MAX_COUNT; ++i) {
+ node = guc_capture_alloc_one_node(guc);
+ if (!node) {
+ xe_gt_warn(guc_to_gt(guc), "Register capture pre-alloc-cache failure\n");
+ /* dont free the priors, use what we got and cleanup at shutdown */
+ return;
+ }
+ guc_capture_add_node_to_cachelist(guc->capture, node);
+ }
+}
+
+static int
+guc_get_max_reglist_count(struct xe_guc *guc)
+{
+ int i, j, k, tmp, maxregcount = 0;
+
+ for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; ++i) {
+ for (j = 0; j < GUC_STATE_CAPTURE_TYPE_MAX; ++j) {
+ for (k = 0; k < GUC_CAPTURE_LIST_CLASS_MAX; ++k) {
+ const struct __guc_mmio_reg_descr_group *match;
+
+ if (j == GUC_STATE_CAPTURE_TYPE_GLOBAL && k > 0)
+ continue;
+
+ tmp = 0;
+ match = guc_capture_get_one_list(guc->capture->reglists, i, j, k);
+ if (match)
+ tmp = match->num_regs;
+
+ match = guc_capture_get_one_list(guc->capture->extlists, i, j, k);
+ if (match)
+ tmp += match->num_regs;
+
+ if (tmp > maxregcount)
+ maxregcount = tmp;
+ }
+ }
+ }
+ if (!maxregcount)
+ maxregcount = PREALLOC_NODES_DEFAULT_NUMREGS;
+
+ return maxregcount;
+}
+
+static void
+guc_capture_create_prealloc_nodes(struct xe_guc *guc)
+{
+ /* skip if we've already done the pre-alloc */
+ if (guc->capture->max_mmio_per_node)
+ return;
+
+ guc->capture->max_mmio_per_node = guc_get_max_reglist_count(guc);
+ __guc_capture_create_prealloc_nodes(guc);
+}
+
+static void
+read_reg_to_node(struct xe_hw_engine *hwe, const struct __guc_mmio_reg_descr_group *list,
+ struct guc_mmio_reg *regs)
+{
+ int i;
+
+ if (!list || !list->list || list->num_regs == 0)
+ return;
+
+ if (!regs)
+ return;
+
+ for (i = 0; i < list->num_regs; i++) {
+ struct __guc_mmio_reg_descr desc = list->list[i];
+ u32 value;
+
+ if (list->type == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
+ value = xe_hw_engine_mmio_read32(hwe, desc.reg);
+ } else {
+ if (list->type == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
+ FIELD_GET(GUC_REGSET_STEERING_NEEDED, desc.flags)) {
+ int group, instance;
+
+ group = FIELD_GET(GUC_REGSET_STEERING_GROUP, desc.flags);
+ instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, desc.flags);
+ value = xe_gt_mcr_unicast_read(hwe->gt, XE_REG_MCR(desc.reg.addr),
+ group, instance);
+ } else {
+ value = xe_mmio_read32(&hwe->gt->mmio, desc.reg);
+ }
+ }
+
+ regs[i].value = value;
+ regs[i].offset = desc.reg.addr;
+ regs[i].flags = desc.flags;
+ regs[i].mask = desc.mask;
+ }
+}
+
+/**
+ * xe_engine_manual_capture - Take a manual engine snapshot from engine.
+ * @hwe: Xe HW Engine.
+ * @snapshot: The engine snapshot
+ *
+ * Take engine snapshot from engine read.
+ *
+ * Returns: None
+ */
+void
+xe_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot)
+{
+ struct xe_gt *gt = hwe->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_guc *guc = &gt->uc.guc;
+ struct xe_devcoredump *devcoredump = &xe->devcoredump;
+ enum guc_capture_list_class_type capture_class;
+ const struct __guc_mmio_reg_descr_group *list;
+ struct __guc_capture_parsed_output *new;
+ enum guc_state_capture_type type;
+ u16 guc_id = 0;
+ u32 lrca = 0;
+
+ if (IS_SRIOV_VF(xe))
+ return;
+
+ new = guc_capture_get_prealloc_node(guc);
+ if (!new)
+ return;
+
+ capture_class = xe_engine_class_to_guc_capture_class(hwe->class);
+ for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) {
+ struct gcap_reg_list_info *reginfo = &new->reginfo[type];
+ /*
+ * regsinfo->regs is allocated based on guc->capture->max_mmio_per_node
+ * which is based on the descriptor list driving the population so
+ * should not overflow
+ */
+
+ /* Get register list for the type/class */
+ list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type,
+ capture_class, false);
+ if (!list) {
+ xe_gt_dbg(gt, "Empty GuC capture register descriptor for %s",
+ hwe->name);
+ continue;
+ }
+
+ read_reg_to_node(hwe, list, reginfo->regs);
+ reginfo->num_regs = list->num_regs;
+
+ /* Capture steering registers for rcs/ccs */
+ if (capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) {
+ list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF,
+ type, capture_class, true);
+ if (list) {
+ read_reg_to_node(hwe, list, &reginfo->regs[reginfo->num_regs]);
+ reginfo->num_regs += list->num_regs;
+ }
+ }
+ }
+
+ if (devcoredump && devcoredump->captured) {
+ struct xe_guc_submit_exec_queue_snapshot *ge = devcoredump->snapshot.ge;
+
+ if (ge) {
+ guc_id = ge->guc.id;
+ if (ge->lrc[0])
+ lrca = ge->lrc[0]->context_desc;
+ }
+ }
+
+ new->eng_class = xe_engine_class_to_guc_class(hwe->class);
+ new->eng_inst = hwe->instance;
+ new->guc_id = guc_id;
+ new->lrca = lrca;
+ new->is_partial = 0;
+ new->locked = 1;
+ new->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL;
+
+ guc_capture_add_node_to_outlist(guc->capture, new);
+ devcoredump->snapshot.matched_node = new;
+}
+
+static struct guc_mmio_reg *
+guc_capture_find_reg(struct gcap_reg_list_info *reginfo, u32 addr, u32 flags)
+{
+ int i;
+
+ if (reginfo && reginfo->num_regs > 0) {
+ struct guc_mmio_reg *regs = reginfo->regs;
+
+ if (regs)
+ for (i = 0; i < reginfo->num_regs; i++)
+ if (regs[i].offset == addr && regs[i].flags == flags)
+ return &regs[i];
+ }
+
+ return NULL;
+}
+
+static void
+snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p,
+ u32 type, const struct __guc_mmio_reg_descr_group *list)
+{
+ struct xe_gt *gt = snapshot->hwe->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_guc *guc = &gt->uc.guc;
+ struct xe_devcoredump *devcoredump = &xe->devcoredump;
+ struct xe_devcoredump_snapshot *devcore_snapshot = &devcoredump->snapshot;
+ struct gcap_reg_list_info *reginfo = NULL;
+ u32 last_value, i;
+ bool is_ext;
+
+ if (!list || list->num_regs == 0)
+ return;
+ XE_WARN_ON(!devcore_snapshot->matched_node);
+
+ is_ext = list == guc->capture->extlists;
+ reginfo = &devcore_snapshot->matched_node->reginfo[type];
+
+ /*
+ * loop through descriptor first and find the register in the node
+ * this is more scalable for developer maintenance as it will ensure
+ * the printout matched the ordering of the static descriptor
+ * table-of-lists
+ */
+ for (i = 0; i < list->num_regs; i++) {
+ const struct __guc_mmio_reg_descr *reg_desc = &list->list[i];
+ struct guc_mmio_reg *reg;
+ u32 value;
+
+ reg = guc_capture_find_reg(reginfo, reg_desc->reg.addr, reg_desc->flags);
+ if (!reg)
+ continue;
+
+ value = reg->value;
+ if (reg_desc->data_type == REG_64BIT_LOW_DW) {
+ last_value = value;
+ /* Low 32 bit dword saved, continue for high 32 bit */
+ continue;
+ } else if (reg_desc->data_type == REG_64BIT_HI_DW) {
+ u64 value_qw = ((u64)value << 32) | last_value;
+
+ drm_printf(p, "\t%s: 0x%016llx\n", reg_desc->regname, value_qw);
+ continue;
+ }
+
+ if (is_ext) {
+ int dss, group, instance;
+
+ group = FIELD_GET(GUC_REGSET_STEERING_GROUP, reg_desc->flags);
+ instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, reg_desc->flags);
+ dss = xe_gt_mcr_steering_info_to_dss_id(gt, group, instance);
+
+ drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, dss, value);
+ } else {
+ drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value);
+ }
+ }
+}
+
+/**
+ * xe_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
+ * @snapshot: Xe HW Engine snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given Xe HW Engine snapshot object.
+ */
+void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
+{
+ const char *grptype[GUC_STATE_CAPTURE_GROUP_TYPE_MAX] = {
+ "full-capture",
+ "partial-capture"
+ };
+ int type;
+ const struct __guc_mmio_reg_descr_group *list;
+ enum guc_capture_list_class_type capture_class;
+
+ struct xe_gt *gt;
+ struct xe_device *xe;
+ struct xe_devcoredump *devcoredump;
+ struct xe_devcoredump_snapshot *devcore_snapshot;
+
+ if (!snapshot)
+ return;
+
+ gt = snapshot->hwe->gt;
+ xe = gt_to_xe(gt);
+ devcoredump = &xe->devcoredump;
+ devcore_snapshot = &devcoredump->snapshot;
+
+ if (!devcore_snapshot->matched_node)
+ return;
+
+ xe_gt_assert(gt, snapshot->source <= XE_ENGINE_CAPTURE_SOURCE_GUC);
+ xe_gt_assert(gt, snapshot->hwe);
+
+ capture_class = xe_engine_class_to_guc_capture_class(snapshot->hwe->class);
+
+ drm_printf(p, "%s (physical), logical instance=%d\n",
+ snapshot->name ? snapshot->name : "",
+ snapshot->logical_instance);
+ drm_printf(p, "\tCapture_source: %s\n",
+ snapshot->source == XE_ENGINE_CAPTURE_SOURCE_GUC ? "GuC" : "Manual");
+ drm_printf(p, "\tCoverage: %s\n", grptype[devcore_snapshot->matched_node->is_partial]);
+ drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
+ snapshot->forcewake.domain, snapshot->forcewake.ref);
+ drm_printf(p, "\tReserved: %s\n",
+ str_yes_no(snapshot->kernel_reserved));
+
+ for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) {
+ list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type,
+ capture_class, false);
+ snapshot_print_by_list_order(snapshot, p, type, list);
+ }
+
+ if (capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) {
+ list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF,
+ GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
+ capture_class, true);
+ snapshot_print_by_list_order(snapshot, p, GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
+ list);
+ }
+
+ drm_puts(p, "\n");
+}
+
+/**
+ * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the job.
+ * @job: The job object.
+ *
+ * Search within the capture outlist for the job, could be used for check if
+ * GuC capture is ready for the job.
+ * If found, the locked boolean of the node will be flagged.
+ *
+ * Returns: found guc-capture node ptr else NULL
+ */
+struct __guc_capture_parsed_output *
+xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job)
+{
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ struct xe_exec_queue *q;
+ struct xe_device *xe;
+ u16 guc_class = GUC_LAST_ENGINE_CLASS + 1;
+ struct xe_devcoredump_snapshot *ss;
+
+ if (!job)
+ return NULL;
+
+ q = job->q;
+ if (!q || !q->gt)
+ return NULL;
+
+ xe = gt_to_xe(q->gt);
+ if (xe->wedged.mode >= 2 || !xe_device_uc_enabled(xe) || IS_SRIOV_VF(xe))
+ return NULL;
+
+ ss = &xe->devcoredump.snapshot;
+ if (ss->matched_node && ss->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC)
+ return ss->matched_node;
+
+ /* Find hwe for the job */
+ for_each_hw_engine(hwe, q->gt, id) {
+ if (hwe != q->hwe)
+ continue;
+ guc_class = xe_engine_class_to_guc_class(hwe->class);
+ break;
+ }
+
+ if (guc_class <= GUC_LAST_ENGINE_CLASS) {
+ struct __guc_capture_parsed_output *n, *ntmp;
+ struct xe_guc *guc = &q->gt->uc.guc;
+ u16 guc_id = q->guc->id;
+ u32 lrca = xe_lrc_ggtt_addr(q->lrc[0]);
+
+ /*
+ * Look for a matching GuC reported error capture node from
+ * the internal output link-list based on engine, guc id and
+ * lrca info.
+ */
+ list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) {
+ if (n->eng_class == guc_class && n->eng_inst == hwe->instance &&
+ n->guc_id == guc_id && n->lrca == lrca &&
+ n->source == XE_ENGINE_CAPTURE_SOURCE_GUC) {
+ n->locked = 1;
+ return n;
+ }
+ }
+ }
+ return NULL;
+}
+
+/**
+ * xe_engine_snapshot_capture_for_job - Take snapshot of associated engine
+ * @job: The job object
+ *
+ * Take snapshot of associated HW Engine
+ *
+ * Returns: None.
+ */
+void
+xe_engine_snapshot_capture_for_job(struct xe_sched_job *job)
+{
+ struct xe_exec_queue *q = job->q;
+ struct xe_device *xe = gt_to_xe(q->gt);
+ struct xe_devcoredump *coredump = &xe->devcoredump;
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ u32 adj_logical_mask = q->logical_mask;
+
+ if (IS_SRIOV_VF(xe))
+ return;
+
+ for_each_hw_engine(hwe, q->gt, id) {
+ if (hwe->class != q->hwe->class ||
+ !(BIT(hwe->logical_instance) & adj_logical_mask)) {
+ coredump->snapshot.hwe[id] = NULL;
+ continue;
+ }
+
+ if (!coredump->snapshot.hwe[id]) {
+ coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe, job);
+ } else {
+ struct __guc_capture_parsed_output *new;
+
+ new = xe_guc_capture_get_matching_and_lock(job);
+ if (new) {
+ struct xe_guc *guc = &q->gt->uc.guc;
+
+ /*
+ * If we are in here, it means we found a fresh
+ * GuC-err-capture node for this engine after
+ * previously failing to find a match in the
+ * early part of guc_exec_queue_timedout_job.
+ * Thus we must free the manually captured node
+ */
+ guc_capture_free_outlist_node(guc->capture,
+ coredump->snapshot.matched_node);
+ coredump->snapshot.matched_node = new;
+ }
+ }
+
+ break;
+ }
+}
+
+/*
+ * xe_guc_capture_put_matched_nodes - Cleanup macthed nodes
+ * @guc: The GuC object
+ *
+ * Free matched node and all nodes with the equal guc_id from
+ * GuC captured outlist
+ */
+void xe_guc_capture_put_matched_nodes(struct xe_guc *guc)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_devcoredump *devcoredump = &xe->devcoredump;
+ struct __guc_capture_parsed_output *n = devcoredump->snapshot.matched_node;
+
+ if (n) {
+ guc_capture_remove_stale_matches_from_list(guc->capture, n);
+ guc_capture_free_outlist_node(guc->capture, n);
+ devcoredump->snapshot.matched_node = NULL;
+ }
+}
+
+/*
+ * xe_guc_capture_steered_list_init - Init steering register list
+ * @guc: The GuC object
+ *
+ * Init steering register list for GuC register capture, create pre-alloc node
+ */
+void xe_guc_capture_steered_list_init(struct xe_guc *guc)
+{
+ /*
+ * For certain engine classes, there are slice and subslice
+ * level registers requiring steering. We allocate and populate
+ * these based on hw config and add it as an extension list at
+ * the end of the pre-populated render list.
+ */
+ guc_capture_alloc_steered_lists(guc);
+ check_guc_capture_size(guc);
+ guc_capture_create_prealloc_nodes(guc);
+}
+
+/*
+ * xe_guc_capture_init - Init for GuC register capture
+ * @guc: The GuC object
+ *
+ * Init for GuC register capture, alloc memory for capture data structure.
+ *
+ * Returns: 0 if success.
+ * -ENOMEM if out of memory
+ */
+int xe_guc_capture_init(struct xe_guc *guc)
+{
+ guc->capture = drmm_kzalloc(guc_to_drm(guc), sizeof(*guc->capture), GFP_KERNEL);
+ if (!guc->capture)
+ return -ENOMEM;
+
+ guc->capture->reglists = guc_capture_get_device_reglist(guc_to_xe(guc));
+
+ INIT_LIST_HEAD(&guc->capture->outlist);
+ INIT_LIST_HEAD(&guc->capture->cachelist);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_capture.h b/drivers/gpu/drm/xe/xe_guc_capture.h
new file mode 100644
index 000000000000..97a795d13dd1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_capture.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021-2024 Intel Corporation
+ */
+
+#ifndef _XE_GUC_CAPTURE_H
+#define _XE_GUC_CAPTURE_H
+
+#include <linux/types.h>
+#include "abi/guc_capture_abi.h"
+#include "xe_guc.h"
+#include "xe_guc_fwif.h"
+
+struct xe_guc;
+struct xe_hw_engine;
+struct xe_hw_engine_snapshot;
+struct xe_sched_job;
+
+static inline enum guc_capture_list_class_type xe_guc_class_to_capture_class(u16 class)
+{
+ switch (class) {
+ case GUC_RENDER_CLASS:
+ case GUC_COMPUTE_CLASS:
+ return GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE;
+ case GUC_GSC_OTHER_CLASS:
+ return GUC_CAPTURE_LIST_CLASS_GSC_OTHER;
+ case GUC_VIDEO_CLASS:
+ case GUC_VIDEOENHANCE_CLASS:
+ case GUC_BLITTER_CLASS:
+ return class;
+ default:
+ XE_WARN_ON(class);
+ return GUC_CAPTURE_LIST_CLASS_MAX;
+ }
+}
+
+static inline enum guc_capture_list_class_type
+xe_engine_class_to_guc_capture_class(enum xe_engine_class class)
+{
+ return xe_guc_class_to_capture_class(xe_engine_class_to_guc_class(class));
+}
+
+void xe_guc_capture_process(struct xe_guc *guc);
+int xe_guc_capture_getlist(struct xe_guc *guc, u32 owner, u32 type,
+ enum guc_capture_list_class_type capture_class, void **outptr);
+int xe_guc_capture_getlistsize(struct xe_guc *guc, u32 owner, u32 type,
+ enum guc_capture_list_class_type capture_class, size_t *size);
+int xe_guc_capture_getnullheader(struct xe_guc *guc, void **outptr, size_t *size);
+size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc);
+const struct __guc_mmio_reg_descr_group *
+xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type,
+ enum guc_capture_list_class_type capture_class, bool is_ext);
+struct __guc_capture_parsed_output *xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job);
+void xe_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot);
+void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p);
+void xe_engine_snapshot_capture_for_job(struct xe_sched_job *job);
+void xe_guc_capture_steered_list_init(struct xe_guc *guc);
+void xe_guc_capture_put_matched_nodes(struct xe_guc *guc);
+int xe_guc_capture_init(struct xe_guc *guc);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_capture_types.h b/drivers/gpu/drm/xe/xe_guc_capture_types.h
new file mode 100644
index 000000000000..2057125b1bfa
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_capture_types.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021-2024 Intel Corporation
+ */
+
+#ifndef _XE_GUC_CAPTURE_TYPES_H
+#define _XE_GUC_CAPTURE_TYPES_H
+
+#include <linux/types.h>
+#include "regs/xe_reg_defs.h"
+
+struct xe_guc;
+
+/* data type of the register in register list */
+enum capture_register_data_type {
+ REG_32BIT = 0,
+ REG_64BIT_LOW_DW,
+ REG_64BIT_HI_DW,
+};
+
+/**
+ * struct __guc_mmio_reg_descr - GuC mmio register descriptor
+ *
+ * xe_guc_capture module uses these structures to define a register
+ * (offsets, names, flags,...) that are used at the ADS regisration
+ * time as well as during runtime processing and reporting of error-
+ * capture states generated by GuC just prior to engine reset events.
+ */
+struct __guc_mmio_reg_descr {
+ /** @reg: the register */
+ struct xe_reg reg;
+ /**
+ * @data_type: data type of the register
+ * Could be 32 bit, low or hi dword of a 64 bit, see enum
+ * register_data_type
+ */
+ enum capture_register_data_type data_type;
+ /** @flags: Flags for the register */
+ u32 flags;
+ /** @mask: The mask to apply */
+ u32 mask;
+ /** @regname: Name of the register */
+ const char *regname;
+};
+
+/**
+ * struct __guc_mmio_reg_descr_group - The group of register descriptor
+ *
+ * xe_guc_capture module uses these structures to maintain static
+ * tables (per unique platform) that consists of lists of registers
+ * (offsets, names, flags,...) that are used at the ADS regisration
+ * time as well as during runtime processing and reporting of error-
+ * capture states generated by GuC just prior to engine reset events.
+ */
+struct __guc_mmio_reg_descr_group {
+ /** @list: The register list */
+ const struct __guc_mmio_reg_descr *list;
+ /** @num_regs: Count of registers in the list */
+ u32 num_regs;
+ /** @owner: PF/VF owner, see enum guc_capture_list_index_type */
+ u32 owner;
+ /** @type: Capture register type, see enum guc_state_capture_type */
+ u32 type;
+ /** @engine: The engine class, see enum guc_capture_list_class_type */
+ u32 engine;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 9c505d3517cd..8aeb1789805c 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -8,6 +8,7 @@
#include <linux/bitfield.h>
#include <linux/circ_buf.h>
#include <linux/delay.h>
+#include <linux/fault-inject.h>
#include <kunit/static_stub.h>
@@ -17,6 +18,7 @@
#include "abi/guc_actions_sriov_abi.h"
#include "abi/guc_klvs_abi.h"
#include "xe_bo.h"
+#include "xe_devcoredump.h"
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_gt_pagefault.h"
@@ -25,12 +27,48 @@
#include "xe_gt_sriov_pf_monitor.h"
#include "xe_gt_tlb_invalidation.h"
#include "xe_guc.h"
+#include "xe_guc_log.h"
#include "xe_guc_relay.h"
#include "xe_guc_submit.h"
#include "xe_map.h"
#include "xe_pm.h"
#include "xe_trace_guc.h"
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+enum {
+ /* Internal states, not error conditions */
+ CT_DEAD_STATE_REARM, /* 0x0001 */
+ CT_DEAD_STATE_CAPTURE, /* 0x0002 */
+
+ /* Error conditions */
+ CT_DEAD_SETUP, /* 0x0004 */
+ CT_DEAD_H2G_WRITE, /* 0x0008 */
+ CT_DEAD_H2G_HAS_ROOM, /* 0x0010 */
+ CT_DEAD_G2H_READ, /* 0x0020 */
+ CT_DEAD_G2H_RECV, /* 0x0040 */
+ CT_DEAD_G2H_RELEASE, /* 0x0080 */
+ CT_DEAD_DEADLOCK, /* 0x0100 */
+ CT_DEAD_PROCESS_FAILED, /* 0x0200 */
+ CT_DEAD_FAST_G2H, /* 0x0400 */
+ CT_DEAD_PARSE_G2H_RESPONSE, /* 0x0800 */
+ CT_DEAD_PARSE_G2H_UNKNOWN, /* 0x1000 */
+ CT_DEAD_PARSE_G2H_ORIGIN, /* 0x2000 */
+ CT_DEAD_PARSE_G2H_TYPE, /* 0x4000 */
+};
+
+static void ct_dead_worker_func(struct work_struct *w);
+static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code);
+
+#define CT_DEAD(ct, ctb, reason_code) ct_dead_capture((ct), (ctb), CT_DEAD_##reason_code)
+#else
+#define CT_DEAD(ct, ctb, reason) \
+ do { \
+ struct guc_ctb *_ctb = (ctb); \
+ if (_ctb) \
+ _ctb->info.broken = true; \
+ } while (0)
+#endif
+
/* Used when a CT send wants to block and / or receive data */
struct g2h_fence {
u32 *response_buffer;
@@ -175,14 +213,18 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE));
- ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", 0);
+ ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", WQ_MEM_RECLAIM);
if (!ct->g2h_wq)
return -ENOMEM;
spin_lock_init(&ct->fast_lock);
xa_init(&ct->fence_lookup);
INIT_WORK(&ct->g2h_worker, g2h_worker_func);
- INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func);
+ INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func);
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ spin_lock_init(&ct->dead.lock);
+ INIT_WORK(&ct->dead.worker, ct_dead_worker_func);
+#endif
init_waitqueue_head(&ct->wq);
init_waitqueue_head(&ct->g2h_fence_wq);
@@ -209,6 +251,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
ct->state = XE_GUC_CT_STATE_DISABLED;
return 0;
}
+ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */
#define desc_read(xe_, guc_ctb__, field_) \
xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \
@@ -395,6 +438,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
xe_gt_assert(gt, !xe_guc_ct_enabled(ct));
+ xe_map_memset(xe, &ct->bo->vmap, 0, 0, ct->bo->size);
guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
@@ -419,10 +463,22 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
if (ct_needs_safe_mode(ct))
ct_enter_safe_mode(ct);
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ /*
+ * The CT has now been reset so the dumper can be re-armed
+ * after any existing dead state has been dumped.
+ */
+ spin_lock_irq(&ct->dead.lock);
+ if (ct->dead.reason)
+ ct->dead.reason |= (1 << CT_DEAD_STATE_REARM);
+ spin_unlock_irq(&ct->dead.lock);
+#endif
+
return 0;
err_out:
xe_gt_err(gt, "Failed to enable GuC CT (%pe)\n", ERR_PTR(err));
+ CT_DEAD(ct, NULL, SETUP);
return err;
}
@@ -466,6 +522,19 @@ static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len)
if (cmd_len > h2g->info.space) {
h2g->info.head = desc_read(ct_to_xe(ct), h2g, head);
+
+ if (h2g->info.head > h2g->info.size) {
+ struct xe_device *xe = ct_to_xe(ct);
+ u32 desc_status = desc_read(xe, h2g, status);
+
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+
+ xe_gt_err(ct_to_gt(ct), "CT: invalid head offset %u >= %u)\n",
+ h2g->info.head, h2g->info.size);
+ CT_DEAD(ct, h2g, H2G_HAS_ROOM);
+ return false;
+ }
+
h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head,
h2g->info.size) -
h2g->info.resv_space;
@@ -521,10 +590,24 @@ static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
{
+ bool bad = false;
+
lockdep_assert_held(&ct->fast_lock);
- xe_gt_assert(ct_to_gt(ct), ct->ctbs.g2h.info.space + g2h_len <=
- ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space);
- xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding);
+
+ bad = ct->ctbs.g2h.info.space + g2h_len >
+ ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space;
+ bad |= !ct->g2h_outstanding;
+
+ if (bad) {
+ xe_gt_err(ct_to_gt(ct), "Invalid G2H release: %d + %d vs %d - %d -> %d vs %d, outstanding = %d!\n",
+ ct->ctbs.g2h.info.space, g2h_len,
+ ct->ctbs.g2h.info.size, ct->ctbs.g2h.info.resv_space,
+ ct->ctbs.g2h.info.space + g2h_len,
+ ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space,
+ ct->g2h_outstanding);
+ CT_DEAD(ct, &ct->ctbs.g2h, G2H_RELEASE);
+ return;
+ }
ct->ctbs.g2h.info.space += g2h_len;
if (!--ct->g2h_outstanding)
@@ -551,12 +634,43 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
u32 full_len;
struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds,
tail * sizeof(u32));
+ u32 desc_status;
full_len = len + GUC_CTB_HDR_LEN;
lockdep_assert_held(&ct->lock);
xe_gt_assert(gt, full_len <= GUC_CTB_MSG_MAX_LEN);
- xe_gt_assert(gt, tail <= h2g->info.size);
+
+ desc_status = desc_read(xe, h2g, status);
+ if (desc_status) {
+ xe_gt_err(gt, "CT write: non-zero status: %u\n", desc_status);
+ goto corrupted;
+ }
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ u32 desc_tail = desc_read(xe, h2g, tail);
+ u32 desc_head = desc_read(xe, h2g, head);
+
+ if (tail != desc_tail) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_MISMATCH);
+ xe_gt_err(gt, "CT write: tail was modified %u != %u\n", desc_tail, tail);
+ goto corrupted;
+ }
+
+ if (tail > h2g->info.size) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT write: tail out of range: %u vs %u\n",
+ tail, h2g->info.size);
+ goto corrupted;
+ }
+
+ if (desc_head >= h2g->info.size) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT write: invalid head offset %u >= %u)\n",
+ desc_head, h2g->info.size);
+ goto corrupted;
+ }
+ }
/* Command will wrap, zero fill (NOPs), return and check credits again */
if (tail + full_len > h2g->info.size) {
@@ -609,6 +723,10 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
desc_read(xe, h2g, head), h2g->info.tail);
return 0;
+
+corrupted:
+ CT_DEAD(ct, &ct->ctbs.h2g, H2G_WRITE);
+ return -EPIPE;
}
/*
@@ -716,7 +834,6 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
{
struct xe_device *xe = ct_to_xe(ct);
struct xe_gt *gt = ct_to_gt(ct);
- struct drm_printer p = xe_gt_info_printer(gt);
unsigned int sleep_period_ms = 1;
int ret;
@@ -769,8 +886,13 @@ try_again:
goto broken;
#undef g2h_avail
- if (dequeue_one_g2h(ct) < 0)
+ ret = dequeue_one_g2h(ct);
+ if (ret < 0) {
+ if (ret != -ECANCELED)
+ xe_gt_err(ct_to_gt(ct), "CTB receive failed (%pe)",
+ ERR_PTR(ret));
goto broken;
+ }
goto try_again;
}
@@ -779,8 +901,7 @@ try_again:
broken:
xe_gt_err(gt, "No forward process on H2G, reset required\n");
- xe_guc_ct_print(ct, &p, true);
- ct->ctbs.h2g.info.broken = true;
+ CT_DEAD(ct, &ct->ctbs.h2g, DEADLOCK);
return -EDEADLK;
}
@@ -848,7 +969,7 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret)
#define ct_alive(ct) \
(xe_guc_ct_enabled(ct) && !ct->ctbs.h2g.info.broken && \
!ct->ctbs.g2h.info.broken)
- if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5))
+ if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5))
return false;
#undef ct_alive
@@ -890,7 +1011,7 @@ retry_same_fence:
goto retry_same_fence;
if (!g2h_fence_needs_alloc(&g2h_fence))
- xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+ xa_erase(&ct->fence_lookup, g2h_fence.seqno);
return ret;
}
@@ -916,7 +1037,7 @@ retry_same_fence:
if (!ret) {
xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s",
g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done));
- xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+ xa_erase(&ct->fence_lookup, g2h_fence.seqno);
mutex_unlock(&ct->lock);
return -ETIME;
}
@@ -1028,6 +1149,7 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
else
xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n",
type, fence);
+ CT_DEAD(ct, NULL, PARSE_G2H_RESPONSE);
return -EPROTO;
}
@@ -1035,6 +1157,7 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
g2h_fence = xa_erase(&ct->fence_lookup, fence);
if (unlikely(!g2h_fence)) {
/* Don't tear down channel, as send could've timed out */
+ /* CT_DEAD(ct, NULL, PARSE_G2H_UNKNOWN); */
xe_gt_warn(gt, "G2H fence (%u) not found!\n", fence);
g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
return 0;
@@ -1079,7 +1202,7 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) {
xe_gt_err(gt, "G2H channel broken on read, origin=%u, reset required\n",
origin);
- ct->ctbs.g2h.info.broken = true;
+ CT_DEAD(ct, &ct->ctbs.g2h, PARSE_G2H_ORIGIN);
return -EPROTO;
}
@@ -1097,7 +1220,7 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
default:
xe_gt_err(gt, "G2H channel broken on read, type=%u, reset required\n",
type);
- ct->ctbs.g2h.info.broken = true;
+ CT_DEAD(ct, &ct->ctbs.g2h, PARSE_G2H_TYPE);
ret = -EOPNOTSUPP;
}
@@ -1140,6 +1263,8 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
/* Selftest only at the moment */
break;
case XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
+ ret = xe_guc_error_capture_handler(guc, payload, adj_len);
+ break;
case XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE:
/* FIXME: Handle this */
break;
@@ -1174,9 +1299,11 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
}
- if (ret)
+ if (ret) {
xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
action, ERR_PTR(ret));
+ CT_DEAD(ct, NULL, PROCESS_FAILED);
+ }
return 0;
}
@@ -1186,7 +1313,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
struct xe_device *xe = ct_to_xe(ct);
struct xe_gt *gt = ct_to_gt(ct);
struct guc_ctb *g2h = &ct->ctbs.g2h;
- u32 tail, head, len;
+ u32 tail, head, len, desc_status;
s32 avail;
u32 action;
u32 *hxg;
@@ -1205,6 +1332,63 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
xe_gt_assert(gt, xe_guc_ct_enabled(ct));
+ desc_status = desc_read(xe, g2h, status);
+ if (desc_status) {
+ if (desc_status & GUC_CTB_STATUS_DISABLED) {
+ /*
+ * Potentially valid if a CLIENT_RESET request resulted in
+ * contexts/engines being reset. But should never happen as
+ * no contexts should be active when CLIENT_RESET is sent.
+ */
+ xe_gt_err(gt, "CT read: unexpected G2H after GuC has stopped!\n");
+ desc_status &= ~GUC_CTB_STATUS_DISABLED;
+ }
+
+ if (desc_status) {
+ xe_gt_err(gt, "CT read: non-zero status: %u\n", desc_status);
+ goto corrupted;
+ }
+ }
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ u32 desc_tail = desc_read(xe, g2h, tail);
+ /*
+ u32 desc_head = desc_read(xe, g2h, head);
+
+ * info.head and desc_head are updated back-to-back at the end of
+ * this function and nowhere else. Hence, they cannot be different
+ * unless two g2h_read calls are running concurrently. Which is not
+ * possible because it is guarded by ct->fast_lock. And yet, some
+ * discrete platforms are reguarly hitting this error :(.
+ *
+ * desc_head rolling backwards shouldn't cause any noticeable
+ * problems - just a delay in GuC being allowed to proceed past that
+ * point in the queue. So for now, just disable the error until it
+ * can be root caused.
+ *
+ if (g2h->info.head != desc_head) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_MISMATCH);
+ xe_gt_err(gt, "CT read: head was modified %u != %u\n",
+ desc_head, g2h->info.head);
+ goto corrupted;
+ }
+ */
+
+ if (g2h->info.head > g2h->info.size) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT read: head out of range: %u vs %u\n",
+ g2h->info.head, g2h->info.size);
+ goto corrupted;
+ }
+
+ if (desc_tail >= g2h->info.size) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT read: invalid tail offset %u >= %u)\n",
+ desc_tail, g2h->info.size);
+ goto corrupted;
+ }
+ }
+
/* Calculate DW available to read */
tail = desc_read(xe, g2h, tail);
avail = tail - g2h->info.head;
@@ -1221,9 +1405,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
if (len > avail) {
xe_gt_err(gt, "G2H channel broken on read, avail=%d, len=%d, reset required\n",
avail, len);
- g2h->info.broken = true;
-
- return -EPROTO;
+ goto corrupted;
}
head = (g2h->info.head + 1) % g2h->info.size;
@@ -1269,6 +1451,10 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
action, len, g2h->info.head, tail);
return len;
+
+corrupted:
+ CT_DEAD(ct, &ct->ctbs.g2h, G2H_READ);
+ return -EPROTO;
}
static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
@@ -1295,9 +1481,11 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
xe_gt_warn(gt, "NOT_POSSIBLE");
}
- if (ret)
+ if (ret) {
xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
action, ERR_PTR(ret));
+ CT_DEAD(ct, NULL, FAST_G2H);
+ }
}
/**
@@ -1357,7 +1545,6 @@ static int dequeue_one_g2h(struct xe_guc_ct *ct)
static void receive_g2h(struct xe_guc_ct *ct)
{
- struct xe_gt *gt = ct_to_gt(ct);
bool ongoing;
int ret;
@@ -1394,9 +1581,8 @@ static void receive_g2h(struct xe_guc_ct *ct)
mutex_unlock(&ct->lock);
if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) {
- struct drm_printer p = xe_gt_info_printer(gt);
-
- xe_guc_ct_print(ct, &p, false);
+ xe_gt_err(ct_to_gt(ct), "CT dequeue failed: %d", ret);
+ CT_DEAD(ct, NULL, G2H_RECV);
kick_reset(ct);
}
} while (ret == 1);
@@ -1412,49 +1598,34 @@ static void g2h_worker_func(struct work_struct *w)
receive_g2h(ct);
}
-static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
- struct guc_ctb_snapshot *snapshot,
- bool atomic)
+static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic,
+ bool want_ctb)
{
- u32 head, tail;
-
- xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
- sizeof(struct guc_ct_buffer_desc));
- memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
+ struct xe_guc_ct_snapshot *snapshot;
- snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32),
- atomic ? GFP_ATOMIC : GFP_KERNEL);
+ snapshot = kzalloc(sizeof(*snapshot), atomic ? GFP_ATOMIC : GFP_KERNEL);
+ if (!snapshot)
+ return NULL;
- if (!snapshot->cmds) {
- drm_err(&xe->drm, "Skipping CTB commands snapshot. Only CTB info will be available.\n");
- return;
+ if (ct->bo && want_ctb) {
+ snapshot->ctb_size = ct->bo->size;
+ snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL);
}
- head = snapshot->desc.head;
- tail = snapshot->desc.tail;
-
- if (head != tail) {
- struct iosys_map map =
- IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32));
-
- while (head != tail) {
- snapshot->cmds[head] = xe_map_rd(xe, &map, 0, u32);
- ++head;
- if (head == ctb->info.size) {
- head = 0;
- map = ctb->cmds;
- } else {
- iosys_map_incr(&map, sizeof(u32));
- }
- }
- }
+ return snapshot;
+}
+
+static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
+ struct guc_ctb_snapshot *snapshot)
+{
+ xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
+ sizeof(struct guc_ct_buffer_desc));
+ memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
}
static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
struct drm_printer *p)
{
- u32 head, tail;
-
drm_printf(p, "\tsize: %d\n", snapshot->info.size);
drm_printf(p, "\tresv_space: %d\n", snapshot->info.resv_space);
drm_printf(p, "\thead: %d\n", snapshot->info.head);
@@ -1464,63 +1635,46 @@ static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head);
drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail);
drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status);
+}
- if (!snapshot->cmds)
- return;
+static struct xe_guc_ct_snapshot *guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic,
+ bool want_ctb)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_guc_ct_snapshot *snapshot;
- head = snapshot->desc.head;
- tail = snapshot->desc.tail;
+ snapshot = guc_ct_snapshot_alloc(ct, atomic, want_ctb);
+ if (!snapshot) {
+ xe_gt_err(ct_to_gt(ct), "Skipping CTB snapshot entirely.\n");
+ return NULL;
+ }
- while (head != tail) {
- drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
- snapshot->cmds[head]);
- ++head;
- if (head == snapshot->info.size)
- head = 0;
+ if (xe_guc_ct_enabled(ct) || ct->state == XE_GUC_CT_STATE_STOPPED) {
+ snapshot->ct_enabled = true;
+ snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding);
+ guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g, &snapshot->h2g);
+ guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h, &snapshot->g2h);
}
-}
-static void guc_ctb_snapshot_free(struct guc_ctb_snapshot *snapshot)
-{
- kfree(snapshot->cmds);
+ if (ct->bo && snapshot->ctb)
+ xe_map_memcpy_from(xe, snapshot->ctb, &ct->bo->vmap, 0, snapshot->ctb_size);
+
+ return snapshot;
}
/**
* xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state.
* @ct: GuC CT object.
- * @atomic: Boolean to indicate if this is called from atomic context like
- * reset or CTB handler or from some regular path like debugfs.
*
* This can be printed out in a later stage like during dev_coredump
- * analysis.
+ * analysis. This is safe to be called during atomic context.
*
* Returns: a GuC CT snapshot object that must be freed by the caller
* by using `xe_guc_ct_snapshot_free`.
*/
-struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
- bool atomic)
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct)
{
- struct xe_device *xe = ct_to_xe(ct);
- struct xe_guc_ct_snapshot *snapshot;
-
- snapshot = kzalloc(sizeof(*snapshot),
- atomic ? GFP_ATOMIC : GFP_KERNEL);
-
- if (!snapshot) {
- drm_err(&xe->drm, "Skipping CTB snapshot entirely.\n");
- return NULL;
- }
-
- if (xe_guc_ct_enabled(ct) || ct->state == XE_GUC_CT_STATE_STOPPED) {
- snapshot->ct_enabled = true;
- snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding);
- guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g,
- &snapshot->h2g, atomic);
- guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h,
- &snapshot->g2h, atomic);
- }
-
- return snapshot;
+ return guc_ct_snapshot_capture(ct, true, true);
}
/**
@@ -1540,11 +1694,13 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
drm_puts(p, "H2G CTB (all sizes in DW):\n");
guc_ctb_snapshot_print(&snapshot->h2g, p);
- drm_puts(p, "\nG2H CTB (all sizes in DW):\n");
+ drm_puts(p, "G2H CTB (all sizes in DW):\n");
guc_ctb_snapshot_print(&snapshot->g2h, p);
-
drm_printf(p, "\tg2h outstanding: %d\n",
snapshot->g2h_outstanding);
+
+ if (snapshot->ctb)
+ xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size);
} else {
drm_puts(p, "CT disabled\n");
}
@@ -1562,8 +1718,7 @@ void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
if (!snapshot)
return;
- guc_ctb_snapshot_free(&snapshot->h2g);
- guc_ctb_snapshot_free(&snapshot->g2h);
+ kfree(snapshot->ctb);
kfree(snapshot);
}
@@ -1571,16 +1726,121 @@ void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
* xe_guc_ct_print - GuC CT Print.
* @ct: GuC CT.
* @p: drm_printer where it will be printed out.
- * @atomic: Boolean to indicate if this is called from atomic context like
- * reset or CTB handler or from some regular path like debugfs.
+ * @want_ctb: Should the full CTB content be dumped (vs just the headers)
*
- * This function quickly capture a snapshot and immediately print it out.
+ * This function will quickly capture a snapshot of the CT state
+ * and immediately print it out.
*/
-void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic)
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb)
{
struct xe_guc_ct_snapshot *snapshot;
- snapshot = xe_guc_ct_snapshot_capture(ct, atomic);
+ snapshot = guc_ct_snapshot_capture(ct, false, want_ctb);
xe_guc_ct_snapshot_print(snapshot, p);
xe_guc_ct_snapshot_free(snapshot);
}
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code)
+{
+ struct xe_guc_log_snapshot *snapshot_log;
+ struct xe_guc_ct_snapshot *snapshot_ct;
+ struct xe_guc *guc = ct_to_guc(ct);
+ unsigned long flags;
+ bool have_capture;
+
+ if (ctb)
+ ctb->info.broken = true;
+
+ /* Ignore further errors after the first dump until a reset */
+ if (ct->dead.reported)
+ return;
+
+ spin_lock_irqsave(&ct->dead.lock, flags);
+
+ /* And only capture one dump at a time */
+ have_capture = ct->dead.reason & (1 << CT_DEAD_STATE_CAPTURE);
+ ct->dead.reason |= (1 << reason_code) |
+ (1 << CT_DEAD_STATE_CAPTURE);
+
+ spin_unlock_irqrestore(&ct->dead.lock, flags);
+
+ if (have_capture)
+ return;
+
+ snapshot_log = xe_guc_log_snapshot_capture(&guc->log, true);
+ snapshot_ct = xe_guc_ct_snapshot_capture((ct));
+
+ spin_lock_irqsave(&ct->dead.lock, flags);
+
+ if (ct->dead.snapshot_log || ct->dead.snapshot_ct) {
+ xe_gt_err(ct_to_gt(ct), "Got unexpected dead CT capture!\n");
+ xe_guc_log_snapshot_free(snapshot_log);
+ xe_guc_ct_snapshot_free(snapshot_ct);
+ } else {
+ ct->dead.snapshot_log = snapshot_log;
+ ct->dead.snapshot_ct = snapshot_ct;
+ }
+
+ spin_unlock_irqrestore(&ct->dead.lock, flags);
+
+ queue_work(system_unbound_wq, &(ct)->dead.worker);
+}
+
+static void ct_dead_print(struct xe_dead_ct *dead)
+{
+ struct xe_guc_ct *ct = container_of(dead, struct xe_guc_ct, dead);
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ static int g_count;
+ struct drm_printer ip = xe_gt_info_printer(gt);
+ struct drm_printer lp = drm_line_printer(&ip, "Capture", ++g_count);
+
+ if (!dead->reason) {
+ xe_gt_err(gt, "CTB is dead for no reason!?\n");
+ return;
+ }
+
+ drm_printf(&lp, "CTB is dead - reason=0x%X\n", dead->reason);
+
+ /* Can't generate a genuine core dump at this point, so just do the good bits */
+ drm_puts(&lp, "**** Xe Device Coredump ****\n");
+ xe_device_snapshot_print(xe, &lp);
+
+ drm_printf(&lp, "**** GT #%d ****\n", gt->info.id);
+ drm_printf(&lp, "\tTile: %d\n", gt->tile->id);
+
+ drm_puts(&lp, "**** GuC Log ****\n");
+ xe_guc_log_snapshot_print(dead->snapshot_log, &lp);
+
+ drm_puts(&lp, "**** GuC CT ****\n");
+ xe_guc_ct_snapshot_print(dead->snapshot_ct, &lp);
+
+ drm_puts(&lp, "Done.\n");
+}
+
+static void ct_dead_worker_func(struct work_struct *w)
+{
+ struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, dead.worker);
+
+ if (!ct->dead.reported) {
+ ct->dead.reported = true;
+ ct_dead_print(&ct->dead);
+ }
+
+ spin_lock_irq(&ct->dead.lock);
+
+ xe_guc_log_snapshot_free(ct->dead.snapshot_log);
+ ct->dead.snapshot_log = NULL;
+ xe_guc_ct_snapshot_free(ct->dead.snapshot_ct);
+ ct->dead.snapshot_ct = NULL;
+
+ if (ct->dead.reason & (1 << CT_DEAD_STATE_REARM)) {
+ /* A reset has occurred so re-arm the error reporting */
+ ct->dead.reason = 0;
+ ct->dead.reported = false;
+ }
+
+ spin_unlock_irq(&ct->dead.lock);
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index 190202fce2d0..82c4ae458dda 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -9,6 +9,7 @@
#include "xe_guc_ct_types.h"
struct drm_printer;
+struct xe_device;
int xe_guc_ct_init(struct xe_guc_ct *ct);
int xe_guc_ct_enable(struct xe_guc_ct *ct);
@@ -16,12 +17,10 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct);
void xe_guc_ct_stop(struct xe_guc_ct *ct);
void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
-struct xe_guc_ct_snapshot *
-xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic);
-void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
- struct drm_printer *p);
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct);
+void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_printer *p);
void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
-void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic);
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb);
static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct)
{
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
index 761cb9031298..8e1b9d981d61 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -52,8 +52,6 @@ struct guc_ctb {
struct guc_ctb_snapshot {
/** @desc: snapshot of the CTB descriptor */
struct guc_ct_buffer_desc desc;
- /** @cmds: snapshot of the CTB commands */
- u32 *cmds;
/** @info: snapshot of the CTB info */
struct guc_ctb_info info;
};
@@ -70,6 +68,10 @@ struct xe_guc_ct_snapshot {
struct guc_ctb_snapshot g2h;
/** @h2g: H2G CTB snapshot */
struct guc_ctb_snapshot h2g;
+ /** @ctb_size: size of the snapshot of the CTB */
+ size_t ctb_size;
+ /** @ctb: snapshot of the entire CTB */
+ u32 *ctb;
};
/**
@@ -86,6 +88,24 @@ enum xe_guc_ct_state {
XE_GUC_CT_STATE_ENABLED,
};
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+/** struct xe_dead_ct - Information for debugging a dead CT */
+struct xe_dead_ct {
+ /** @lock: protects memory allocation/free operations, and @reason updates */
+ spinlock_t lock;
+ /** @reason: bit mask of CT_DEAD_* reason codes */
+ unsigned int reason;
+ /** @reported: for preventing multiple dumps per error sequence */
+ bool reported;
+ /** @worker: worker thread to get out of interrupt context before dumping */
+ struct work_struct worker;
+ /** snapshot_ct: copy of CT state and CTB content at point of error */
+ struct xe_guc_ct_snapshot *snapshot_ct;
+ /** snapshot_log: copy of GuC log at point of error */
+ struct xe_guc_log_snapshot *snapshot_log;
+};
+#endif
+
/**
* struct xe_guc_ct - GuC command transport (CT) layer
*
@@ -128,6 +148,11 @@ struct xe_guc_ct {
u32 msg[GUC_CTB_MSG_MAX_LEN];
/** @fast_msg: Message buffer */
u32 fast_msg[GUC_CTB_MSG_MAX_LEN];
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ /** @dead: information for debugging dead CTs */
+ struct xe_dead_ct dead;
+#endif
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
index d3822cbea273..995b306aced7 100644
--- a/drivers/gpu/drm/xe/xe_guc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
@@ -47,9 +47,23 @@ static int guc_log(struct seq_file *m, void *data)
return 0;
}
+static int guc_ctb(struct seq_file *m, void *data)
+{
+ struct xe_guc *guc = node_to_guc(m->private);
+ struct xe_device *xe = guc_to_xe(guc);
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ xe_pm_runtime_get(xe);
+ xe_guc_ct_print(&guc->ct, &p, true);
+ xe_pm_runtime_put(xe);
+
+ return 0;
+}
+
static const struct drm_info_list debugfs_list[] = {
{"guc_info", guc_info, 0},
{"guc_log", guc_log, 0},
+ {"guc_ctb", guc_ctb, 0},
};
void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent)
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 19ee71aeaf17..08ffe59f22fa 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -8,7 +8,9 @@
#include <linux/bits.h>
+#include "abi/guc_capture_abi.h"
#include "abi/guc_klvs_abi.h"
+#include "xe_hw_engine_types.h"
#define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4
#define G2H_LEN_DW_DEREGISTER_CONTEXT 3
@@ -103,6 +105,7 @@ struct guc_update_exec_queue_policy {
#define GUC_CTL_FEATURE 2
#define GUC_CTL_ENABLE_SLPC BIT(2)
+#define GUC_CTL_ENABLE_LITE_RESTORE BIT(4)
#define GUC_CTL_DISABLE_SCHEDULER BIT(14)
#define GUC_CTL_DEBUG 3
@@ -157,24 +160,6 @@ struct guc_policies {
u32 reserved[4];
} __packed;
-/* GuC MMIO reg state struct */
-struct guc_mmio_reg {
- u32 offset;
- u32 value;
- u32 flags;
- u32 mask;
-#define GUC_REGSET_MASKED BIT(0)
-#define GUC_REGSET_MASKED_WITH_VALUE BIT(2)
-#define GUC_REGSET_RESTORE_ONLY BIT(3)
-} __packed;
-
-/* GuC register sets */
-struct guc_mmio_reg_set {
- u32 address;
- u16 count;
- u16 reserved;
-} __packed;
-
/* Generic GT SysInfo data types */
#define GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED 0
#define GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK 1
@@ -188,12 +173,6 @@ struct guc_gt_system_info {
u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX];
} __packed;
-enum {
- GUC_CAPTURE_LIST_INDEX_PF = 0,
- GUC_CAPTURE_LIST_INDEX_VF = 1,
- GUC_CAPTURE_LIST_INDEX_MAX = 2,
-};
-
/* GuC Additional Data Struct */
struct guc_ads {
struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
diff --git a/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h
index da0fedbbdbaf..da10cf0389cb 100644
--- a/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h
+++ b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h
@@ -18,6 +18,13 @@
MAKE_GUC_KLV_KEY(CONCATENATE(VF_CFG_THRESHOLD_, TAG))
/**
+ * MAKE_GUC_KLV_VF_CFG_THRESHOLD_LEN - Prepare the name of the KLV length constant.
+ * @TAG: unique tag of the GuC threshold KLV key.
+ */
+#define MAKE_GUC_KLV_VF_CFG_THRESHOLD_LEN(TAG) \
+ MAKE_GUC_KLV_LEN(CONCATENATE(VF_CFG_THRESHOLD_, TAG))
+
+/**
* xe_guc_klv_threshold_key_to_index - Find index of the tracked GuC threshold.
* @key: GuC threshold KLV key.
*
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
index a37ee3419428..df4cfb698cdb 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.c
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -5,13 +5,26 @@
#include "xe_guc_log.h"
+#include <linux/fault-inject.h>
+
#include <drm/drm_managed.h>
+#include "regs/xe_guc_regs.h"
#include "xe_bo.h"
+#include "xe_devcoredump.h"
+#include "xe_force_wake.h"
#include "xe_gt.h"
+#include "xe_gt_printk.h"
#include "xe_map.h"
+#include "xe_mmio.h"
#include "xe_module.h"
+static struct xe_guc *
+log_to_guc(struct xe_guc_log *log)
+{
+ return container_of(log, struct xe_guc, log);
+}
+
static struct xe_gt *
log_to_gt(struct xe_guc_log *log)
{
@@ -49,32 +62,194 @@ static size_t guc_log_size(void)
CAPTURE_BUFFER_SIZE;
}
-void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
+#define GUC_LOG_CHUNK_SIZE SZ_2M
+
+static struct xe_guc_log_snapshot *xe_guc_log_snapshot_alloc(struct xe_guc_log *log, bool atomic)
+{
+ struct xe_guc_log_snapshot *snapshot;
+ size_t remain;
+ int i;
+
+ snapshot = kzalloc(sizeof(*snapshot), atomic ? GFP_ATOMIC : GFP_KERNEL);
+ if (!snapshot)
+ return NULL;
+
+ /*
+ * NB: kmalloc has a hard limit well below the maximum GuC log buffer size.
+ * Also, can't use vmalloc as might be called from atomic context. So need
+ * to break the buffer up into smaller chunks that can be allocated.
+ */
+ snapshot->size = log->bo->size;
+ snapshot->num_chunks = DIV_ROUND_UP(snapshot->size, GUC_LOG_CHUNK_SIZE);
+
+ snapshot->copy = kcalloc(snapshot->num_chunks, sizeof(*snapshot->copy),
+ atomic ? GFP_ATOMIC : GFP_KERNEL);
+ if (!snapshot->copy)
+ goto fail_snap;
+
+ remain = snapshot->size;
+ for (i = 0; i < snapshot->num_chunks; i++) {
+ size_t size = min(GUC_LOG_CHUNK_SIZE, remain);
+
+ snapshot->copy[i] = kmalloc(size, atomic ? GFP_ATOMIC : GFP_KERNEL);
+ if (!snapshot->copy[i])
+ goto fail_copy;
+ remain -= size;
+ }
+
+ return snapshot;
+
+fail_copy:
+ for (i = 0; i < snapshot->num_chunks; i++)
+ kfree(snapshot->copy[i]);
+ kfree(snapshot->copy);
+fail_snap:
+ kfree(snapshot);
+ return NULL;
+}
+
+/**
+ * xe_guc_log_snapshot_free - free a previously captured GuC log snapshot
+ * @snapshot: GuC log snapshot structure
+ *
+ * Return: pointer to a newly allocated snapshot object or null if out of memory. Caller is
+ * responsible for calling xe_guc_log_snapshot_free when done with the snapshot.
+ */
+void xe_guc_log_snapshot_free(struct xe_guc_log_snapshot *snapshot)
+{
+ int i;
+
+ if (!snapshot)
+ return;
+
+ if (snapshot->copy) {
+ for (i = 0; i < snapshot->num_chunks; i++)
+ kfree(snapshot->copy[i]);
+ kfree(snapshot->copy);
+ }
+
+ kfree(snapshot);
+}
+
+/**
+ * xe_guc_log_snapshot_capture - create a new snapshot copy the GuC log for later dumping
+ * @log: GuC log structure
+ * @atomic: is the call inside an atomic section of some kind?
+ *
+ * Return: pointer to a newly allocated snapshot object or null if out of memory. Caller is
+ * responsible for calling xe_guc_log_snapshot_free when done with the snapshot.
+ */
+struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log, bool atomic)
{
+ struct xe_guc_log_snapshot *snapshot;
struct xe_device *xe = log_to_xe(log);
- size_t size;
- int i, j;
+ struct xe_guc *guc = log_to_guc(log);
+ struct xe_gt *gt = log_to_gt(log);
+ unsigned int fw_ref;
+ size_t remain;
+ int i;
- xe_assert(xe, log->bo);
+ if (!log->bo) {
+ xe_gt_err(gt, "GuC log buffer not allocated\n");
+ return NULL;
+ }
- size = log->bo->size;
+ snapshot = xe_guc_log_snapshot_alloc(log, atomic);
+ if (!snapshot) {
+ xe_gt_err(gt, "GuC log snapshot not allocated\n");
+ return NULL;
+ }
-#define DW_PER_READ 128
- xe_assert(xe, !(size % (DW_PER_READ * sizeof(u32))));
- for (i = 0; i < size / sizeof(u32); i += DW_PER_READ) {
- u32 read[DW_PER_READ];
+ remain = snapshot->size;
+ for (i = 0; i < snapshot->num_chunks; i++) {
+ size_t size = min(GUC_LOG_CHUNK_SIZE, remain);
- xe_map_memcpy_from(xe, read, &log->bo->vmap, i * sizeof(u32),
- DW_PER_READ * sizeof(u32));
-#define DW_PER_PRINT 4
- for (j = 0; j < DW_PER_READ / DW_PER_PRINT; ++j) {
- u32 *print = read + j * DW_PER_PRINT;
+ xe_map_memcpy_from(xe, snapshot->copy[i], &log->bo->vmap,
+ i * GUC_LOG_CHUNK_SIZE, size);
+ remain -= size;
+ }
- drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n",
- *(print + 0), *(print + 1),
- *(print + 2), *(print + 3));
- }
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref) {
+ snapshot->stamp = ~0ULL;
+ } else {
+ snapshot->stamp = xe_mmio_read64_2x32(&gt->mmio, GUC_PMTIMESTAMP_LO);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
+ snapshot->ktime = ktime_get_boottime_ns();
+ snapshot->level = log->level;
+ snapshot->ver_found = guc->fw.versions.found[XE_UC_FW_VER_RELEASE];
+ snapshot->ver_want = guc->fw.versions.wanted;
+ snapshot->path = guc->fw.path;
+
+ return snapshot;
+}
+
+/**
+ * xe_guc_log_snapshot_print - dump a previously saved copy of the GuC log to some useful location
+ * @snapshot: a snapshot of the GuC log
+ * @p: the printer object to output to
+ */
+void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_printer *p)
+{
+ size_t remain;
+ int i;
+
+ if (!snapshot) {
+ drm_printf(p, "GuC log snapshot not allocated!\n");
+ return;
+ }
+
+ drm_printf(p, "GuC firmware: %s\n", snapshot->path);
+ drm_printf(p, "GuC version: %u.%u.%u (wanted %u.%u.%u)\n",
+ snapshot->ver_found.major, snapshot->ver_found.minor, snapshot->ver_found.patch,
+ snapshot->ver_want.major, snapshot->ver_want.minor, snapshot->ver_want.patch);
+ drm_printf(p, "Kernel timestamp: 0x%08llX [%llu]\n", snapshot->ktime, snapshot->ktime);
+ drm_printf(p, "GuC timestamp: 0x%08llX [%llu]\n", snapshot->stamp, snapshot->stamp);
+ drm_printf(p, "Log level: %u\n", snapshot->level);
+
+ remain = snapshot->size;
+ for (i = 0; i < snapshot->num_chunks; i++) {
+ size_t size = min(GUC_LOG_CHUNK_SIZE, remain);
+
+ xe_print_blob_ascii85(p, i ? NULL : "Log data", snapshot->copy[i], 0, size);
+ remain -= size;
+ }
+}
+
+/**
+ * xe_guc_log_print_dmesg - dump a copy of the GuC log to dmesg
+ * @log: GuC log structure
+ */
+void xe_guc_log_print_dmesg(struct xe_guc_log *log)
+{
+ struct xe_gt *gt = log_to_gt(log);
+ static int g_count;
+ struct drm_printer ip = xe_gt_info_printer(gt);
+ struct drm_printer lp = drm_line_printer(&ip, "Capture", ++g_count);
+
+ drm_printf(&lp, "Dumping GuC log for %ps...\n", __builtin_return_address(0));
+
+ xe_guc_log_print(log, &lp);
+
+ drm_printf(&lp, "Done.\n");
+}
+
+/**
+ * xe_guc_log_print - dump a copy of the GuC log to some useful location
+ * @log: GuC log structure
+ * @p: the printer object to output to
+ */
+void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
+{
+ struct xe_guc_log_snapshot *snapshot;
+
+ drm_printf(p, "**** GuC Log ****\n");
+
+ snapshot = xe_guc_log_snapshot_capture(log, false);
+ drm_printf(p, "CS reference clock: %u\n", log_to_gt(log)->info.reference_clock);
+ xe_guc_log_snapshot_print(snapshot, p);
+ xe_guc_log_snapshot_free(snapshot);
}
int xe_guc_log_init(struct xe_guc_log *log)
@@ -96,3 +271,105 @@ int xe_guc_log_init(struct xe_guc_log *log)
return 0;
}
+
+ALLOW_ERROR_INJECTION(xe_guc_log_init, ERRNO); /* See xe_pci_probe() */
+
+static u32 xe_guc_log_section_size_crash(struct xe_guc_log *log)
+{
+ return CRASH_BUFFER_SIZE;
+}
+
+static u32 xe_guc_log_section_size_debug(struct xe_guc_log *log)
+{
+ return DEBUG_BUFFER_SIZE;
+}
+
+/**
+ * xe_guc_log_section_size_capture - Get capture buffer size within log sections.
+ * @log: The log object.
+ *
+ * This function will return the capture buffer size within log sections.
+ *
+ * Return: capture buffer size.
+ */
+u32 xe_guc_log_section_size_capture(struct xe_guc_log *log)
+{
+ return CAPTURE_BUFFER_SIZE;
+}
+
+/**
+ * xe_guc_get_log_buffer_size - Get log buffer size for a type.
+ * @log: The log object.
+ * @type: The log buffer type
+ *
+ * Return: buffer size.
+ */
+u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type)
+{
+ switch (type) {
+ case GUC_LOG_BUFFER_CRASH_DUMP:
+ return xe_guc_log_section_size_crash(log);
+ case GUC_LOG_BUFFER_DEBUG:
+ return xe_guc_log_section_size_debug(log);
+ case GUC_LOG_BUFFER_CAPTURE:
+ return xe_guc_log_section_size_capture(log);
+ }
+ return 0;
+}
+
+/**
+ * xe_guc_get_log_buffer_offset - Get offset in log buffer for a type.
+ * @log: The log object.
+ * @type: The log buffer type
+ *
+ * This function will return the offset in the log buffer for a type.
+ * Return: buffer offset.
+ */
+u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type)
+{
+ enum guc_log_buffer_type i;
+ u32 offset = PAGE_SIZE;/* for the log_buffer_states */
+
+ for (i = GUC_LOG_BUFFER_CRASH_DUMP; i < GUC_LOG_BUFFER_TYPE_MAX; ++i) {
+ if (i == type)
+ break;
+ offset += xe_guc_get_log_buffer_size(log, i);
+ }
+
+ return offset;
+}
+
+/**
+ * xe_guc_check_log_buf_overflow - Check if log buffer overflowed
+ * @log: The log object.
+ * @type: The log buffer type
+ * @full_cnt: The count of buffer full
+ *
+ * This function will check count of buffer full against previous, mismatch
+ * indicate overflowed.
+ * Update the sampled_overflow counter, if the 4 bit counter overflowed, add
+ * up 16 to correct the value.
+ *
+ * Return: True if overflowed.
+ */
+bool xe_guc_check_log_buf_overflow(struct xe_guc_log *log, enum guc_log_buffer_type type,
+ unsigned int full_cnt)
+{
+ unsigned int prev_full_cnt = log->stats[type].sampled_overflow;
+ bool overflow = false;
+
+ if (full_cnt != prev_full_cnt) {
+ overflow = true;
+
+ log->stats[type].overflow = full_cnt;
+ log->stats[type].sampled_overflow += full_cnt - prev_full_cnt;
+
+ if (full_cnt < prev_full_cnt) {
+ /* buffer_full_cnt is a 4 bit counter */
+ log->stats[type].sampled_overflow += 16;
+ }
+ xe_gt_notice(log_to_gt(log), "log buffer overflow\n");
+ }
+
+ return overflow;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h
index 2d25ab28b4b3..5b896f5fafaf 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.h
+++ b/drivers/gpu/drm/xe/xe_guc_log.h
@@ -7,8 +7,10 @@
#define _XE_GUC_LOG_H_
#include "xe_guc_log_types.h"
+#include "abi/guc_log_abi.h"
struct drm_printer;
+struct xe_device;
#if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER)
#define CRASH_BUFFER_SIZE SZ_1M
@@ -17,7 +19,7 @@ struct drm_printer;
#else
#define CRASH_BUFFER_SIZE SZ_8K
#define DEBUG_BUFFER_SIZE SZ_64K
-#define CAPTURE_BUFFER_SIZE SZ_16K
+#define CAPTURE_BUFFER_SIZE SZ_1M
#endif
/*
* While we're using plain log level in i915, GuC controls are much more...
@@ -38,6 +40,10 @@ struct drm_printer;
int xe_guc_log_init(struct xe_guc_log *log);
void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p);
+void xe_guc_log_print_dmesg(struct xe_guc_log *log);
+struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log, bool atomic);
+void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_printer *p);
+void xe_guc_log_snapshot_free(struct xe_guc_log_snapshot *snapshot);
static inline u32
xe_guc_log_get_level(struct xe_guc_log *log)
@@ -45,4 +51,11 @@ xe_guc_log_get_level(struct xe_guc_log *log)
return log->level;
}
+u32 xe_guc_log_section_size_capture(struct xe_guc_log *log);
+u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type);
+u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type);
+bool xe_guc_check_log_buf_overflow(struct xe_guc_log *log,
+ enum guc_log_buffer_type type,
+ unsigned int full_cnt);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h
index 125080d138a7..b3d5c72ac752 100644
--- a/drivers/gpu/drm/xe/xe_guc_log_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_log_types.h
@@ -7,10 +7,38 @@
#define _XE_GUC_LOG_TYPES_H_
#include <linux/types.h>
+#include "abi/guc_log_abi.h"
+
+#include "xe_uc_fw_types.h"
struct xe_bo;
/**
+ * struct xe_guc_log_snapshot:
+ * Capture of the GuC log plus various state useful for decoding the log
+ */
+struct xe_guc_log_snapshot {
+ /** @size: Size in bytes of the @copy allocation */
+ size_t size;
+ /** @copy: Host memory copy of the log buffer for later dumping, split into chunks */
+ void **copy;
+ /** @num_chunks: Number of chunks within @copy */
+ int num_chunks;
+ /** @ktime: Kernel time the snapshot was taken */
+ u64 ktime;
+ /** @stamp: GuC timestamp at which the snapshot was taken */
+ u64 stamp;
+ /** @level: GuC log verbosity level */
+ u32 level;
+ /** @ver_found: GuC firmware version */
+ struct xe_uc_fw_version ver_found;
+ /** @ver_want: GuC firmware version that driver expected */
+ struct xe_uc_fw_version ver_want;
+ /** @path: Path of GuC firmware blob */
+ const char *path;
+};
+
+/**
* struct xe_guc_log - GuC log
*/
struct xe_guc_log {
@@ -18,6 +46,12 @@ struct xe_guc_log {
u32 level;
/** @bo: XE BO for GuC log */
struct xe_bo *bo;
+ /** @stats: logging related stats */
+ struct {
+ u32 sampled_overflow;
+ u32 overflow;
+ u32 flush;
+ } stats[GUC_LOG_BUFFER_TYPE_MAX];
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 034b29984d5e..e8b9faeaef64 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -262,7 +262,7 @@ static void pc_set_manual_rp_ctrl(struct xe_guc_pc *pc, bool enable)
u32 state = enable ? RPSWCTL_ENABLE : RPSWCTL_DISABLE;
/* Allow/Disallow punit to process software freq requests */
- xe_mmio_write32(gt, RP_CONTROL, state);
+ xe_mmio_write32(&gt->mmio, RP_CONTROL, state);
}
static void pc_set_cur_freq(struct xe_guc_pc *pc, u32 freq)
@@ -274,7 +274,7 @@ static void pc_set_cur_freq(struct xe_guc_pc *pc, u32 freq)
/* Req freq is in units of 16.66 Mhz */
rpnswreq = REG_FIELD_PREP(REQ_RATIO_MASK, encode_freq(freq));
- xe_mmio_write32(gt, RPNSWREQ, rpnswreq);
+ xe_mmio_write32(&gt->mmio, RPNSWREQ, rpnswreq);
/* Sleep for a small time to allow pcode to respond */
usleep_range(100, 300);
@@ -334,9 +334,9 @@ static void mtl_update_rpe_value(struct xe_guc_pc *pc)
u32 reg;
if (xe_gt_is_media_type(gt))
- reg = xe_mmio_read32(gt, MTL_MPE_FREQUENCY);
+ reg = xe_mmio_read32(&gt->mmio, MTL_MPE_FREQUENCY);
else
- reg = xe_mmio_read32(gt, MTL_GT_RPE_FREQUENCY);
+ reg = xe_mmio_read32(&gt->mmio, MTL_GT_RPE_FREQUENCY);
pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg));
}
@@ -353,9 +353,9 @@ static void tgl_update_rpe_value(struct xe_guc_pc *pc)
* PCODE at a different register
*/
if (xe->info.platform == XE_PVC)
- reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP);
+ reg = xe_mmio_read32(&gt->mmio, PVC_RP_STATE_CAP);
else
- reg = xe_mmio_read32(gt, FREQ_INFO_REC);
+ reg = xe_mmio_read32(&gt->mmio, FREQ_INFO_REC);
pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
}
@@ -392,10 +392,10 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc)
/* When in RC6, actual frequency reported will be 0. */
if (GRAPHICS_VERx100(xe) >= 1270) {
- freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
+ freq = xe_mmio_read32(&gt->mmio, MTL_MIRROR_TARGET_WP1);
freq = REG_FIELD_GET(MTL_CAGF_MASK, freq);
} else {
- freq = xe_mmio_read32(gt, GT_PERF_STATUS);
+ freq = xe_mmio_read32(&gt->mmio, GT_PERF_STATUS);
freq = REG_FIELD_GET(CAGF_MASK, freq);
}
@@ -415,22 +415,24 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc)
int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
{
struct xe_gt *gt = pc_to_gt(pc);
- int ret;
+ unsigned int fw_ref;
/*
* GuC SLPC plays with cur freq request when GuCRC is enabled
* Block RC6 for a more reliable read.
*/
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (ret)
- return ret;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return -ETIMEDOUT;
+ }
- *freq = xe_mmio_read32(gt, RPNSWREQ);
+ *freq = xe_mmio_read32(&gt->mmio, RPNSWREQ);
*freq = REG_FIELD_GET(REQ_RATIO_MASK, *freq);
*freq = decode_freq(*freq);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
}
@@ -480,6 +482,7 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
{
struct xe_gt *gt = pc_to_gt(pc);
+ unsigned int fw_ref;
int ret;
mutex_lock(&pc->freq_lock);
@@ -493,9 +496,11 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
* GuC SLPC plays with min freq request when GuCRC is enabled
* Block RC6 for a more reliable read.
*/
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (ret)
- goto out;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ ret = -ETIMEDOUT;
+ goto fw;
+ }
ret = pc_action_query_task_state(pc);
if (ret)
@@ -504,7 +509,7 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
*freq = pc_get_min_freq(pc);
fw:
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
out:
mutex_unlock(&pc->freq_lock);
return ret;
@@ -612,10 +617,10 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc)
u32 reg, gt_c_state;
if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
- reg = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
+ reg = xe_mmio_read32(&gt->mmio, MTL_MIRROR_TARGET_WP1);
gt_c_state = REG_FIELD_GET(MTL_CC_MASK, reg);
} else {
- reg = xe_mmio_read32(gt, GT_CORE_STATUS);
+ reg = xe_mmio_read32(&gt->mmio, GT_CORE_STATUS);
gt_c_state = REG_FIELD_GET(RCN_MASK, reg);
}
@@ -638,7 +643,7 @@ u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc)
struct xe_gt *gt = pc_to_gt(pc);
u32 reg;
- reg = xe_mmio_read32(gt, GT_GFX_RC6);
+ reg = xe_mmio_read32(&gt->mmio, GT_GFX_RC6);
return reg;
}
@@ -652,7 +657,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc)
struct xe_gt *gt = pc_to_gt(pc);
u64 reg;
- reg = xe_mmio_read32(gt, MTL_MEDIA_MC6);
+ reg = xe_mmio_read32(&gt->mmio, MTL_MEDIA_MC6);
return reg;
}
@@ -665,9 +670,9 @@ static void mtl_init_fused_rp_values(struct xe_guc_pc *pc)
xe_device_assert_mem_access(pc_to_xe(pc));
if (xe_gt_is_media_type(gt))
- reg = xe_mmio_read32(gt, MTL_MEDIAP_STATE_CAP);
+ reg = xe_mmio_read32(&gt->mmio, MTL_MEDIAP_STATE_CAP);
else
- reg = xe_mmio_read32(gt, MTL_RP_STATE_CAP);
+ reg = xe_mmio_read32(&gt->mmio, MTL_RP_STATE_CAP);
pc->rp0_freq = decode_freq(REG_FIELD_GET(MTL_RP0_CAP_MASK, reg));
@@ -683,9 +688,9 @@ static void tgl_init_fused_rp_values(struct xe_guc_pc *pc)
xe_device_assert_mem_access(pc_to_xe(pc));
if (xe->info.platform == XE_PVC)
- reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP);
+ reg = xe_mmio_read32(&gt->mmio, PVC_RP_STATE_CAP);
else
- reg = xe_mmio_read32(gt, RP_STATE_CAP);
+ reg = xe_mmio_read32(&gt->mmio, RP_STATE_CAP);
pc->rp0_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
pc->rpn_freq = REG_FIELD_GET(RPN_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
}
@@ -855,6 +860,7 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
{
struct xe_device *xe = pc_to_xe(pc);
struct xe_gt *gt = pc_to_gt(pc);
+ unsigned int fw_ref;
int ret = 0;
if (xe->info.skip_guc_pc)
@@ -864,13 +870,15 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
if (ret)
return ret;
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (ret)
- return ret;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return -ETIMEDOUT;
+ }
xe_gt_idle_disable_c6(gt);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
}
@@ -956,13 +964,16 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
struct xe_device *xe = pc_to_xe(pc);
struct xe_gt *gt = pc_to_gt(pc);
u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
+ unsigned int fw_ref;
int ret;
xe_gt_assert(gt, xe_device_uc_enabled(xe));
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (ret)
- return ret;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return -ETIMEDOUT;
+ }
if (xe->info.skip_guc_pc) {
if (xe->info.platform != XE_PVC)
@@ -1005,7 +1016,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
ret = pc_action_setup_gucrc(pc, GUCRC_FIRMWARE_CONTROL);
out:
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return ret;
}
@@ -1037,18 +1048,19 @@ static void xe_guc_pc_fini_hw(void *arg)
{
struct xe_guc_pc *pc = arg;
struct xe_device *xe = pc_to_xe(pc);
+ unsigned int fw_ref;
if (xe_device_wedged(xe))
return;
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
+ fw_ref = xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
xe_guc_pc_gucrc_disable(pc);
XE_WARN_ON(xe_guc_pc_stop(pc));
/* Bind requested freq to mert_freq_cap before unload */
pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq));
- xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), fw_ref);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c
index ade6162dc259..8f62de026724 100644
--- a/drivers/gpu/drm/xe/xe_guc_relay.c
+++ b/drivers/gpu/drm/xe/xe_guc_relay.c
@@ -5,6 +5,7 @@
#include <linux/bitfield.h>
#include <linux/delay.h>
+#include <linux/fault-inject.h>
#include <drm/drm_managed.h>
@@ -355,6 +356,7 @@ int xe_guc_relay_init(struct xe_guc_relay *relay)
return drmm_add_action_or_reset(&xe->drm, __fini_relay, relay);
}
+ALLOW_ERROR_INJECTION(xe_guc_relay_init, ERRNO); /* See xe_pci_probe() */
static u32 to_relay_error(int err)
{
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 4f5d00aea716..9a8564ea06b5 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -27,6 +27,7 @@
#include "xe_gt_clock.h"
#include "xe_gt_printk.h"
#include "xe_guc.h"
+#include "xe_guc_capture.h"
#include "xe_guc_ct.h"
#include "xe_guc_exec_queue_types.h"
#include "xe_guc_id_mgr.h"
@@ -716,6 +717,7 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
struct xe_exec_queue *q = job->q;
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_device *xe = guc_to_xe(guc);
+ struct dma_fence *fence = NULL;
bool lr = xe_exec_queue_is_lr(q);
xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
@@ -733,12 +735,12 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
if (lr) {
xe_sched_job_set_error(job, -EOPNOTSUPP);
- return NULL;
- } else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) {
- return job->fence;
+ dma_fence_put(job->fence); /* Drop ref from xe_sched_job_arm */
} else {
- return dma_fence_get(job->fence);
+ fence = job->fence;
}
+
+ return fence;
}
static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
@@ -749,7 +751,7 @@ static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
xe_sched_job_put(job);
}
-static int guc_read_stopped(struct xe_guc *guc)
+int xe_guc_read_stopped(struct xe_guc *guc)
{
return atomic_read(&guc->submission_state.stopped);
}
@@ -771,7 +773,7 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
set_min_preemption_timeout(guc, q);
smp_rmb();
ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) ||
- guc_read_stopped(guc), HZ * 5);
+ xe_guc_read_stopped(guc), HZ * 5);
if (!ret) {
struct xe_gpu_scheduler *sched = &q->guc->sched;
@@ -897,7 +899,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
*/
ret = wait_event_timeout(guc->ct.wq,
!exec_queue_pending_disable(q) ||
- guc_read_stopped(guc), HZ * 5);
+ xe_guc_read_stopped(guc), HZ * 5);
if (!ret) {
drm_warn(&xe->drm, "Schedule disable failed to respond");
xe_sched_submission_start(sched);
@@ -975,8 +977,8 @@ static void enable_scheduling(struct xe_exec_queue *q)
ret = wait_event_timeout(guc->ct.wq,
!exec_queue_pending_enable(q) ||
- guc_read_stopped(guc), HZ * 5);
- if (!ret || guc_read_stopped(guc)) {
+ xe_guc_read_stopped(guc), HZ * 5);
+ if (!ret || xe_guc_read_stopped(guc)) {
xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond");
set_exec_queue_banned(q);
xe_gt_reset_async(q->gt);
@@ -1031,6 +1033,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
struct xe_gpu_scheduler *sched = &q->guc->sched;
struct xe_guc *guc = exec_queue_to_guc(q);
const char *process_name = "no process";
+ struct xe_device *xe = guc_to_xe(guc);
+ unsigned int fw_ref;
int err = -ETIME;
pid_t pid = -1;
int i = 0;
@@ -1058,6 +1062,22 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
exec_queue_destroyed(q);
/*
+ * If devcoredump not captured and GuC capture for the job is not ready
+ * do manual capture first and decide later if we need to use it
+ */
+ if (!exec_queue_killed(q) && !xe->devcoredump.captured &&
+ !xe_guc_capture_get_matching_and_lock(job)) {
+ /* take force wake before engine register manual capture */
+ fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
+ xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n");
+
+ xe_engine_snapshot_capture_for_job(job);
+
+ xe_force_wake_put(gt_to_fw(q->gt), fw_ref);
+ }
+
+ /*
* XXX: Sampling timeout doesn't work in wedged mode as we have to
* modify scheduling state to read timestamp. We could read the
* timestamp from a register to accumulate current running time but this
@@ -1080,8 +1100,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
*/
ret = wait_event_timeout(guc->ct.wq,
!exec_queue_pending_enable(q) ||
- guc_read_stopped(guc), HZ * 5);
- if (!ret || guc_read_stopped(guc))
+ xe_guc_read_stopped(guc), HZ * 5);
+ if (!ret || xe_guc_read_stopped(guc))
goto trigger_reset;
/*
@@ -1105,8 +1125,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
smp_rmb();
ret = wait_event_timeout(guc->ct.wq,
!exec_queue_pending_disable(q) ||
- guc_read_stopped(guc), HZ * 5);
- if (!ret || guc_read_stopped(guc)) {
+ xe_guc_read_stopped(guc), HZ * 5);
+ if (!ret || xe_guc_read_stopped(guc)) {
trigger_reset:
if (!ret)
xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond");
@@ -1295,7 +1315,7 @@ static void suspend_fence_signal(struct xe_exec_queue *q)
struct xe_device *xe = guc_to_xe(guc);
xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) ||
- guc_read_stopped(guc));
+ xe_guc_read_stopped(guc));
xe_assert(xe, q->guc->suspend_pending);
__suspend_fence_signal(q);
@@ -1309,9 +1329,9 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) &&
exec_queue_enabled(q)) {
wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING ||
- guc_read_stopped(guc));
+ xe_guc_read_stopped(guc));
- if (!guc_read_stopped(guc)) {
+ if (!xe_guc_read_stopped(guc)) {
s64 since_resume_ms =
ktime_ms_delta(ktime_get(),
q->guc->resume_time);
@@ -1435,7 +1455,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
q->entity = &ge->entity;
- if (guc_read_stopped(guc))
+ if (xe_guc_read_stopped(guc))
xe_sched_stop(sched);
mutex_unlock(&guc->submission_state.lock);
@@ -1591,7 +1611,7 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
ret = wait_event_interruptible_timeout(q->guc->suspend_wait,
!READ_ONCE(q->guc->suspend_pending) ||
exec_queue_killed(q) ||
- guc_read_stopped(guc),
+ xe_guc_read_stopped(guc),
HZ * 5);
if (!ret) {
@@ -1717,7 +1737,7 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc)
void xe_guc_submit_reset_wait(struct xe_guc *guc)
{
wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) ||
- !guc_read_stopped(guc));
+ !xe_guc_read_stopped(guc));
}
void xe_guc_submit_stop(struct xe_guc *guc)
@@ -1726,7 +1746,7 @@ void xe_guc_submit_stop(struct xe_guc *guc)
unsigned long index;
struct xe_device *xe = guc_to_xe(guc);
- xe_assert(xe, guc_read_stopped(guc) == 1);
+ xe_assert(xe, xe_guc_read_stopped(guc) == 1);
mutex_lock(&guc->submission_state.lock);
@@ -1770,7 +1790,7 @@ int xe_guc_submit_start(struct xe_guc *guc)
unsigned long index;
struct xe_device *xe = guc_to_xe(guc);
- xe_assert(xe, guc_read_stopped(guc) == 1);
+ xe_assert(xe, xe_guc_read_stopped(guc) == 1);
mutex_lock(&guc->submission_state.lock);
atomic_dec(&guc->submission_state.stopped);
@@ -1949,8 +1969,6 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d",
xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
- /* FIXME: Do error capture, most likely async */
-
trace_xe_exec_queue_reset(q);
/*
@@ -1966,6 +1984,36 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
return 0;
}
+/*
+ * xe_guc_error_capture_handler - Handler of GuC captured message
+ * @guc: The GuC object
+ * @msg: Point to the message
+ * @len: The message length
+ *
+ * When GuC captured data is ready, GuC will send message
+ * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be
+ * called 1st to check status before process the data comes with the message.
+ *
+ * Returns: error code. 0 if success
+ */
+int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+ u32 status;
+
+ if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) {
+ xe_gt_dbg(guc_to_gt(guc), "Invalid length %u", len);
+ return -EPROTO;
+ }
+
+ status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
+ if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
+ xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space");
+
+ xe_guc_capture_process(guc);
+
+ return 0;
+}
+
int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
u32 len)
{
@@ -2180,7 +2228,7 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
if (!snapshot)
return;
- drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id);
+ drm_printf(p, "GuC ID: %d\n", snapshot->guc.id);
drm_printf(p, "\tName: %s\n", snapshot->name);
drm_printf(p, "\tClass: %d\n", snapshot->class);
drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index bdf8c9f3d24a..9b71a986c6ca 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -20,12 +20,14 @@ void xe_guc_submit_stop(struct xe_guc *guc);
int xe_guc_submit_start(struct xe_guc *guc);
void xe_guc_submit_wedge(struct xe_guc *guc);
+int xe_guc_read_stopped(struct xe_guc *guc);
int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len);
int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
u32 len);
int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len);
struct xe_guc_submit_exec_queue_snapshot *
xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q);
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index ed150fc09ad0..fa75f57bf5da 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -58,6 +58,8 @@ struct xe_guc {
struct xe_guc_ads ads;
/** @ct: GuC ct */
struct xe_guc_ct ct;
+ /** @capture: the error-state-capture module's data and objects */
+ struct xe_guc_state_capture *capture;
/** @pc: GuC Power Conservation */
struct xe_guc_pc pc;
/** @dbm: GuC Doorbell Manager */
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index f5459f97af23..6a846e4cb221 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -229,7 +229,7 @@ bool xe_huc_is_authenticated(struct xe_huc *huc, enum xe_huc_auth_types type)
{
struct xe_gt *gt = huc_to_gt(huc);
- return xe_mmio_read32(gt, huc_auth_modes[type].reg) & huc_auth_modes[type].val;
+ return xe_mmio_read32(&gt->mmio, huc_auth_modes[type].reg) & huc_auth_modes[type].val;
}
int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type)
@@ -268,7 +268,7 @@ int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type)
goto fail;
}
- ret = xe_mmio_wait32(gt, huc_auth_modes[type].reg, huc_auth_modes[type].val,
+ ret = xe_mmio_wait32(&gt->mmio, huc_auth_modes[type].reg, huc_auth_modes[type].val,
huc_auth_modes[type].val, 100000, NULL, false);
if (ret) {
xe_gt_err(gt, "HuC: firmware not verified: %pe\n", ERR_PTR(ret));
@@ -296,19 +296,19 @@ void xe_huc_sanitize(struct xe_huc *huc)
void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p)
{
struct xe_gt *gt = huc_to_gt(huc);
- int err;
+ unsigned int fw_ref;
xe_uc_fw_print(&huc->fw, p);
if (!xe_uc_fw_is_enabled(&huc->fw))
return;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
return;
drm_printf(p, "\nHuC status: 0x%08x\n",
- xe_mmio_read32(gt, HUC_KERNEL_LOAD_INFO));
+ xe_mmio_read32(&gt->mmio, HUC_KERNEL_LOAD_INFO));
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index c9c3beb3ce8d..1557acee3523 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -12,6 +12,7 @@
#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
+#include "regs/xe_irq_regs.h"
#include "xe_assert.h"
#include "xe_bo.h"
#include "xe_device.h"
@@ -23,6 +24,7 @@
#include "xe_gt_printk.h"
#include "xe_gt_mcr.h"
#include "xe_gt_topology.h"
+#include "xe_guc_capture.h"
#include "xe_hw_engine_group.h"
#include "xe_hw_fence.h"
#include "xe_irq.h"
@@ -295,7 +297,7 @@ void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe,
reg.addr += hwe->mmio_base;
- xe_mmio_write32(hwe->gt, reg, val);
+ xe_mmio_write32(&hwe->gt->mmio, reg, val);
}
/**
@@ -315,7 +317,7 @@ u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
reg.addr += hwe->mmio_base;
- return xe_mmio_read32(hwe->gt, reg);
+ return xe_mmio_read32(&hwe->gt->mmio, reg);
}
void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
@@ -324,7 +326,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
- xe_mmio_write32(hwe->gt, RCU_MODE,
+ xe_mmio_write32(&hwe->gt->mmio, RCU_MODE,
_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
@@ -354,7 +356,7 @@ static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt,
hwe->class != XE_ENGINE_CLASS_RENDER)
return false;
- return xe_mmio_read32(hwe->gt, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
+ return xe_mmio_read32(&hwe->gt->mmio, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
}
void
@@ -460,6 +462,30 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
}
+static const struct engine_info *find_engine_info(enum xe_engine_class class, int instance)
+{
+ const struct engine_info *info;
+ enum xe_hw_engine_id id;
+
+ for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
+ info = &engine_infos[id];
+ if (info->class == class && info->instance == instance)
+ return info;
+ }
+
+ return NULL;
+}
+
+static u16 get_msix_irq_offset(struct xe_gt *gt, enum xe_engine_class class)
+{
+ /* For MSI-X, hw engines report to offset of engine instance zero */
+ const struct engine_info *info = find_engine_info(class, 0);
+
+ xe_gt_assert(gt, info);
+
+ return info ? info->irq_offset : 0;
+}
+
static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
enum xe_hw_engine_id id)
{
@@ -479,7 +505,9 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
hwe->class = info->class;
hwe->instance = info->instance;
hwe->mmio_base = info->mmio_base;
- hwe->irq_offset = info->irq_offset;
+ hwe->irq_offset = xe_device_has_msix(gt_to_xe(gt)) ?
+ get_msix_irq_offset(gt, info->class) :
+ info->irq_offset;
hwe->domain = info->domain;
hwe->name = info->name;
hwe->fence_irq = &gt->fence_irq[info->class];
@@ -612,7 +640,7 @@ static void read_media_fuses(struct xe_gt *gt)
xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
- media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE);
+ media_fuse = xe_mmio_read32(&gt->mmio, GT_VEBOX_VDBOX_DISABLE);
/*
* Pre-Xe_HP platforms had register bits representing absent engines,
@@ -657,7 +685,7 @@ static void read_copy_fuses(struct xe_gt *gt)
xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
- bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3);
+ bcs_mask = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3);
bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
/* BCS0 is always present; only BCS1-BCS8 may be fused off */
@@ -704,7 +732,7 @@ static void read_compute_fuses_from_reg(struct xe_gt *gt)
struct xe_device *xe = gt_to_xe(gt);
u32 ccs_mask;
- ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4);
+ ccs_mask = xe_mmio_read32(&gt->mmio, XEHP_FUSE4);
ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask);
for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
@@ -742,8 +770,8 @@ static void check_gsc_availability(struct xe_gt *gt)
gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0);
/* interrupts where previously enabled, so turn them off */
- xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, 0);
- xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~0);
+ xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_ENABLE, 0);
+ xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_MASK, ~0);
drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
}
@@ -798,60 +826,10 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
xe_hw_fence_irq_run(hwe->fence_irq);
}
-static bool
-is_slice_common_per_gslice(struct xe_device *xe)
-{
- return GRAPHICS_VERx100(xe) >= 1255;
-}
-
-static void
-xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
- struct xe_hw_engine_snapshot *snapshot)
-{
- struct xe_gt *gt = hwe->gt;
- struct xe_device *xe = gt_to_xe(gt);
- unsigned int dss;
- u16 group, instance;
-
- snapshot->reg.instdone.ring = xe_hw_engine_mmio_read32(hwe, RING_INSTDONE(0));
-
- if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
- return;
-
- if (is_slice_common_per_gslice(xe) == false) {
- snapshot->reg.instdone.slice_common[0] =
- xe_mmio_read32(gt, SC_INSTDONE);
- snapshot->reg.instdone.slice_common_extra[0] =
- xe_mmio_read32(gt, SC_INSTDONE_EXTRA);
- snapshot->reg.instdone.slice_common_extra2[0] =
- xe_mmio_read32(gt, SC_INSTDONE_EXTRA2);
- } else {
- for_each_geometry_dss(dss, gt, group, instance) {
- snapshot->reg.instdone.slice_common[dss] =
- xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE, group, instance);
- snapshot->reg.instdone.slice_common_extra[dss] =
- xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA, group, instance);
- snapshot->reg.instdone.slice_common_extra2[dss] =
- xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA2, group, instance);
- }
- }
-
- for_each_geometry_dss(dss, gt, group, instance) {
- snapshot->reg.instdone.sampler[dss] =
- xe_gt_mcr_unicast_read(gt, SAMPLER_INSTDONE, group, instance);
- snapshot->reg.instdone.row[dss] =
- xe_gt_mcr_unicast_read(gt, ROW_INSTDONE, group, instance);
-
- if (GRAPHICS_VERx100(xe) >= 1255)
- snapshot->reg.instdone.geom_svg[dss] =
- xe_gt_mcr_unicast_read(gt, XEHPG_INSTDONE_GEOM_SVGUNIT,
- group, instance);
- }
-}
-
/**
* xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
* @hwe: Xe HW Engine.
+ * @job: The job object.
*
* This can be printed out in a later stage like during dev_coredump
* analysis.
@@ -860,11 +838,10 @@ xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
* caller, using `xe_hw_engine_snapshot_free`.
*/
struct xe_hw_engine_snapshot *
-xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
+xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job)
{
struct xe_hw_engine_snapshot *snapshot;
- size_t len;
- u64 val;
+ struct __guc_capture_parsed_output *node;
if (!xe_hw_engine_is_valid(hwe))
return NULL;
@@ -874,28 +851,6 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
if (!snapshot)
return NULL;
- /* Because XE_MAX_DSS_FUSE_BITS is defined in xe_gt_types.h and it
- * includes xe_hw_engine_types.h the length of this 3 registers can't be
- * set in struct xe_hw_engine_snapshot, so here doing additional
- * allocations.
- */
- len = (XE_MAX_DSS_FUSE_BITS * sizeof(u32));
- snapshot->reg.instdone.slice_common = kzalloc(len, GFP_ATOMIC);
- snapshot->reg.instdone.slice_common_extra = kzalloc(len, GFP_ATOMIC);
- snapshot->reg.instdone.slice_common_extra2 = kzalloc(len, GFP_ATOMIC);
- snapshot->reg.instdone.sampler = kzalloc(len, GFP_ATOMIC);
- snapshot->reg.instdone.row = kzalloc(len, GFP_ATOMIC);
- snapshot->reg.instdone.geom_svg = kzalloc(len, GFP_ATOMIC);
- if (!snapshot->reg.instdone.slice_common ||
- !snapshot->reg.instdone.slice_common_extra ||
- !snapshot->reg.instdone.slice_common_extra2 ||
- !snapshot->reg.instdone.sampler ||
- !snapshot->reg.instdone.row ||
- !snapshot->reg.instdone.geom_svg) {
- xe_hw_engine_snapshot_free(snapshot);
- return NULL;
- }
-
snapshot->name = kstrdup(hwe->name, GFP_ATOMIC);
snapshot->hwe = hwe;
snapshot->logical_instance = hwe->logical_instance;
@@ -903,157 +858,32 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
hwe->domain);
snapshot->mmio_base = hwe->mmio_base;
+ snapshot->kernel_reserved = xe_hw_engine_is_reserved(hwe);
/* no more VF accessible data below this point */
if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
return snapshot;
- snapshot->reg.ring_execlist_status =
- xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
- val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
- snapshot->reg.ring_execlist_status |= val << 32;
-
- snapshot->reg.ring_execlist_sq_contents =
- xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0));
- val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0));
- snapshot->reg.ring_execlist_sq_contents |= val << 32;
-
- snapshot->reg.ring_acthd = xe_hw_engine_mmio_read32(hwe, RING_ACTHD(0));
- val = xe_hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
- snapshot->reg.ring_acthd |= val << 32;
-
- snapshot->reg.ring_bbaddr = xe_hw_engine_mmio_read32(hwe, RING_BBADDR(0));
- val = xe_hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
- snapshot->reg.ring_bbaddr |= val << 32;
-
- snapshot->reg.ring_dma_fadd =
- xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
- val = xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
- snapshot->reg.ring_dma_fadd |= val << 32;
-
- snapshot->reg.ring_hwstam = xe_hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
- snapshot->reg.ring_hws_pga = xe_hw_engine_mmio_read32(hwe, RING_HWS_PGA(0));
- snapshot->reg.ring_start = xe_hw_engine_mmio_read32(hwe, RING_START(0));
- if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) {
- val = xe_hw_engine_mmio_read32(hwe, RING_START_UDW(0));
- snapshot->reg.ring_start |= val << 32;
- }
- if (xe_gt_has_indirect_ring_state(hwe->gt)) {
- snapshot->reg.indirect_ring_state =
- xe_hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0));
- }
-
- snapshot->reg.ring_head =
- xe_hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
- snapshot->reg.ring_tail =
- xe_hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR;
- snapshot->reg.ring_ctl = xe_hw_engine_mmio_read32(hwe, RING_CTL(0));
- snapshot->reg.ring_mi_mode =
- xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
- snapshot->reg.ring_mode = xe_hw_engine_mmio_read32(hwe, RING_MODE(0));
- snapshot->reg.ring_imr = xe_hw_engine_mmio_read32(hwe, RING_IMR(0));
- snapshot->reg.ring_esr = xe_hw_engine_mmio_read32(hwe, RING_ESR(0));
- snapshot->reg.ring_emr = xe_hw_engine_mmio_read32(hwe, RING_EMR(0));
- snapshot->reg.ring_eir = xe_hw_engine_mmio_read32(hwe, RING_EIR(0));
- snapshot->reg.ipehr = xe_hw_engine_mmio_read32(hwe, RING_IPEHR(0));
- xe_hw_engine_snapshot_instdone_capture(hwe, snapshot);
-
- if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
- snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
-
- return snapshot;
-}
-
-static void
-xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
-{
- struct xe_gt *gt = snapshot->hwe->gt;
- struct xe_device *xe = gt_to_xe(gt);
- u16 group, instance;
- unsigned int dss;
-
- drm_printf(p, "\tRING_INSTDONE: 0x%08x\n", snapshot->reg.instdone.ring);
-
- if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
- return;
-
- if (is_slice_common_per_gslice(xe) == false) {
- drm_printf(p, "\tSC_INSTDONE[0]: 0x%08x\n",
- snapshot->reg.instdone.slice_common[0]);
- drm_printf(p, "\tSC_INSTDONE_EXTRA[0]: 0x%08x\n",
- snapshot->reg.instdone.slice_common_extra[0]);
- drm_printf(p, "\tSC_INSTDONE_EXTRA2[0]: 0x%08x\n",
- snapshot->reg.instdone.slice_common_extra2[0]);
- } else {
- for_each_geometry_dss(dss, gt, group, instance) {
- drm_printf(p, "\tSC_INSTDONE[%u]: 0x%08x\n", dss,
- snapshot->reg.instdone.slice_common[dss]);
- drm_printf(p, "\tSC_INSTDONE_EXTRA[%u]: 0x%08x\n", dss,
- snapshot->reg.instdone.slice_common_extra[dss]);
- drm_printf(p, "\tSC_INSTDONE_EXTRA2[%u]: 0x%08x\n", dss,
- snapshot->reg.instdone.slice_common_extra2[dss]);
+ if (job) {
+ /* If got guc capture, set source to GuC */
+ node = xe_guc_capture_get_matching_and_lock(job);
+ if (node) {
+ struct xe_device *xe = gt_to_xe(hwe->gt);
+ struct xe_devcoredump *coredump = &xe->devcoredump;
+
+ coredump->snapshot.matched_node = node;
+ snapshot->source = XE_ENGINE_CAPTURE_SOURCE_GUC;
+ xe_gt_dbg(hwe->gt, "Found and locked GuC-err-capture node");
+ return snapshot;
}
}
- for_each_geometry_dss(dss, gt, group, instance) {
- drm_printf(p, "\tSAMPLER_INSTDONE[%u]: 0x%08x\n", dss,
- snapshot->reg.instdone.sampler[dss]);
- drm_printf(p, "\tROW_INSTDONE[%u]: 0x%08x\n", dss,
- snapshot->reg.instdone.row[dss]);
-
- if (GRAPHICS_VERx100(xe) >= 1255)
- drm_printf(p, "\tINSTDONE_GEOM_SVGUNIT[%u]: 0x%08x\n",
- dss, snapshot->reg.instdone.geom_svg[dss]);
- }
-}
+ /* otherwise, do manual capture */
+ xe_engine_manual_capture(hwe, snapshot);
+ snapshot->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL;
+ xe_gt_dbg(hwe->gt, "Proceeding with manual engine snapshot");
-/**
- * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
- * @snapshot: Xe HW Engine snapshot object.
- * @p: drm_printer where it will be printed out.
- *
- * This function prints out a given Xe HW Engine snapshot object.
- */
-void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
- struct drm_printer *p)
-{
- if (!snapshot)
- return;
-
- drm_printf(p, "%s (physical), logical instance=%d\n",
- snapshot->name ? snapshot->name : "",
- snapshot->logical_instance);
- drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
- snapshot->forcewake.domain, snapshot->forcewake.ref);
- drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam);
- drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga);
- drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n",
- snapshot->reg.ring_execlist_status);
- drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n",
- snapshot->reg.ring_execlist_sq_contents);
- drm_printf(p, "\tRING_START: 0x%016llx\n", snapshot->reg.ring_start);
- drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head);
- drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail);
- drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
- drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
- drm_printf(p, "\tRING_MODE: 0x%08x\n",
- snapshot->reg.ring_mode);
- drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr);
- drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr);
- drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr);
- drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir);
- drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd);
- drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr);
- drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd);
- drm_printf(p, "\tINDIRECT_RING_STATE: 0x%08x\n",
- snapshot->reg.indirect_ring_state);
- drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
- xe_hw_engine_snapshot_instdone_print(snapshot, p);
-
- if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
- drm_printf(p, "\tRCU_MODE: 0x%08x\n",
- snapshot->reg.rcu_mode);
- drm_puts(p, "\n");
+ return snapshot;
}
/**
@@ -1065,15 +895,18 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
*/
void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
{
+ struct xe_gt *gt;
if (!snapshot)
return;
- kfree(snapshot->reg.instdone.slice_common);
- kfree(snapshot->reg.instdone.slice_common_extra);
- kfree(snapshot->reg.instdone.slice_common_extra2);
- kfree(snapshot->reg.instdone.sampler);
- kfree(snapshot->reg.instdone.row);
- kfree(snapshot->reg.instdone.geom_svg);
+ gt = snapshot->hwe->gt;
+ /*
+ * xe_guc_capture_put_matched_nodes is called here and from
+ * xe_devcoredump_snapshot_free, to cover the 2 calling paths
+ * of hw_engines - debugfs and devcoredump free.
+ */
+ xe_guc_capture_put_matched_nodes(&gt->uc.guc);
+
kfree(snapshot->name);
kfree(snapshot);
}
@@ -1089,8 +922,8 @@ void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p)
{
struct xe_hw_engine_snapshot *snapshot;
- snapshot = xe_hw_engine_snapshot_capture(hwe);
- xe_hw_engine_snapshot_print(snapshot, p);
+ snapshot = xe_hw_engine_snapshot_capture(hwe, NULL);
+ xe_engine_snapshot_print(snapshot, p);
xe_hw_engine_snapshot_free(snapshot);
}
@@ -1150,7 +983,7 @@ const char *xe_hw_engine_class_to_str(enum xe_engine_class class)
u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe)
{
- return xe_mmio_read64_2x32(hwe->gt, RING_TIMESTAMP(hwe->mmio_base));
+ return xe_mmio_read64_2x32(&hwe->gt->mmio, RING_TIMESTAMP(hwe->mmio_base));
}
enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe)
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
index 022819a4a8eb..da0a6922a26f 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine.h
@@ -11,6 +11,7 @@
struct drm_printer;
struct drm_xe_engine_class_instance;
struct xe_device;
+struct xe_sched_job;
#ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN
#define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN
@@ -54,12 +55,9 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec);
void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe);
u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
enum xe_engine_class engine_class);
-
struct xe_hw_engine_snapshot *
-xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe);
+xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job);
void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot);
-void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
- struct drm_printer *p);
void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p);
void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index 8be6d420ece4..719f27ef00a5 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -152,6 +152,11 @@ struct xe_hw_engine {
struct xe_hw_engine_group *hw_engine_group;
};
+enum xe_hw_engine_snapshot_source_id {
+ XE_ENGINE_CAPTURE_SOURCE_MANUAL,
+ XE_ENGINE_CAPTURE_SOURCE_GUC
+};
+
/**
* struct xe_hw_engine_snapshot - Hardware engine snapshot
*
@@ -160,6 +165,8 @@ struct xe_hw_engine {
struct xe_hw_engine_snapshot {
/** @name: name of the hw engine */
char *name;
+ /** @source: Data source, either manual or GuC */
+ enum xe_hw_engine_snapshot_source_id source;
/** @hwe: hw engine */
struct xe_hw_engine *hwe;
/** @logical_instance: logical instance of this hw engine */
@@ -173,65 +180,8 @@ struct xe_hw_engine_snapshot {
} forcewake;
/** @mmio_base: MMIO base address of this hw engine*/
u32 mmio_base;
- /** @reg: Useful MMIO register snapshot */
- struct {
- /** @reg.ring_execlist_status: RING_EXECLIST_STATUS */
- u64 ring_execlist_status;
- /** @reg.ring_execlist_sq_contents: RING_EXECLIST_SQ_CONTENTS */
- u64 ring_execlist_sq_contents;
- /** @reg.ring_acthd: RING_ACTHD */
- u64 ring_acthd;
- /** @reg.ring_bbaddr: RING_BBADDR */
- u64 ring_bbaddr;
- /** @reg.ring_dma_fadd: RING_DMA_FADD */
- u64 ring_dma_fadd;
- /** @reg.ring_hwstam: RING_HWSTAM */
- u32 ring_hwstam;
- /** @reg.ring_hws_pga: RING_HWS_PGA */
- u32 ring_hws_pga;
- /** @reg.ring_start: RING_START */
- u64 ring_start;
- /** @reg.ring_head: RING_HEAD */
- u32 ring_head;
- /** @reg.ring_tail: RING_TAIL */
- u32 ring_tail;
- /** @reg.ring_ctl: RING_CTL */
- u32 ring_ctl;
- /** @reg.ring_mi_mode: RING_MI_MODE */
- u32 ring_mi_mode;
- /** @reg.ring_mode: RING_MODE */
- u32 ring_mode;
- /** @reg.ring_imr: RING_IMR */
- u32 ring_imr;
- /** @reg.ring_esr: RING_ESR */
- u32 ring_esr;
- /** @reg.ring_emr: RING_EMR */
- u32 ring_emr;
- /** @reg.ring_eir: RING_EIR */
- u32 ring_eir;
- /** @reg.indirect_ring_state: INDIRECT_RING_STATE */
- u32 indirect_ring_state;
- /** @reg.ipehr: IPEHR */
- u32 ipehr;
- /** @reg.rcu_mode: RCU_MODE */
- u32 rcu_mode;
- struct {
- /** @reg.instdone.ring: RING_INSTDONE */
- u32 ring;
- /** @reg.instdone.slice_common: SC_INSTDONE */
- u32 *slice_common;
- /** @reg.instdone.slice_common_extra: SC_INSTDONE_EXTRA */
- u32 *slice_common_extra;
- /** @reg.instdone.slice_common_extra2: SC_INSTDONE_EXTRA2 */
- u32 *slice_common_extra2;
- /** @reg.instdone.sampler: SAMPLER_INSTDONE */
- u32 *sampler;
- /** @reg.instdone.row: ROW_INSTDONE */
- u32 *row;
- /** @reg.instdone.geom_svg: INSTDONE_GEOM_SVGUNIT */
- u32 *geom_svg;
- } instdone;
- } reg;
+ /** @kernel_reserved: Engine reserved, can't be used by userspace */
+ bool kernel_reserved;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index aa11728e7e79..fde56dad3ab7 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -149,7 +149,7 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *v
u64 reg_val, min, max;
struct xe_device *xe = hwmon->xe;
struct xe_reg rapl_limit, pkg_power_sku;
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel);
pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel);
@@ -190,7 +190,7 @@ unlock:
static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long value)
{
- struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
int ret = 0;
u64 reg_val;
struct xe_reg rapl_limit;
@@ -222,7 +222,7 @@ unlock:
static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, long *value)
{
- struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel);
u64 reg_val;
@@ -259,7 +259,7 @@ static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, l
static void
xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy)
{
- struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
struct xe_hwmon_energy_info *ei = &hwmon->ei[channel];
u64 reg_val;
@@ -282,7 +282,7 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at
char *buf)
{
struct xe_hwmon *hwmon = dev_get_drvdata(dev);
- struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
u32 x, y, x_w = 2; /* 2 bits */
u64 r, tau4, out;
int sensor_index = to_sensor_dev_attr(attr)->index;
@@ -323,7 +323,7 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a
const char *buf, size_t count)
{
struct xe_hwmon *hwmon = dev_get_drvdata(dev);
- struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
u32 x, y, rxy, x_w = 2; /* 2 bits */
u64 tau4, r, max_win;
unsigned long val;
@@ -498,7 +498,7 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel,
static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, int channel, long *value)
{
- struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
u64 reg_val;
reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, channel));
@@ -781,7 +781,7 @@ static const struct hwmon_chip_info hwmon_chip_info = {
static void
xe_hwmon_get_preregistration_info(struct xe_device *xe)
{
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
struct xe_hwmon *hwmon = xe->hwmon;
long energy;
u64 val_sku_unit = 0;
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 5f2c368c35ad..b7995ebd54ab 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -10,8 +10,7 @@
#include <drm/drm_managed.h>
#include "display/xe_display.h"
-#include "regs/xe_gt_regs.h"
-#include "regs/xe_regs.h"
+#include "regs/xe_irq_regs.h"
#include "xe_device.h"
#include "xe_drv.h"
#include "xe_gsc_proxy.h"
@@ -30,14 +29,14 @@
#define IIR(offset) XE_REG(offset + 0x8)
#define IER(offset) XE_REG(offset + 0xc)
-static void assert_iir_is_zero(struct xe_gt *mmio, struct xe_reg reg)
+static void assert_iir_is_zero(struct xe_mmio *mmio, struct xe_reg reg)
{
u32 val = xe_mmio_read32(mmio, reg);
if (val == 0)
return;
- drm_WARN(&gt_to_xe(mmio)->drm, 1,
+ drm_WARN(&mmio->tile->xe->drm, 1,
"Interrupt register 0x%x is not zero: 0x%08x\n",
reg.addr, val);
xe_mmio_write32(mmio, reg, 0xffffffff);
@@ -52,7 +51,7 @@ static void assert_iir_is_zero(struct xe_gt *mmio, struct xe_reg reg)
*/
static void unmask_and_enable(struct xe_tile *tile, u32 irqregs, u32 bits)
{
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
/*
* If we're just enabling an interrupt now, it shouldn't already
@@ -70,7 +69,7 @@ static void unmask_and_enable(struct xe_tile *tile, u32 irqregs, u32 bits)
/* Mask and disable all interrupts. */
static void mask_and_disable(struct xe_tile *tile, u32 irqregs)
{
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
xe_mmio_write32(mmio, IMR(irqregs), ~0);
/* Posting read */
@@ -87,7 +86,7 @@ static void mask_and_disable(struct xe_tile *tile, u32 irqregs)
static u32 xelp_intr_disable(struct xe_device *xe)
{
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
xe_mmio_write32(mmio, GFX_MSTR_IRQ, 0);
@@ -103,7 +102,7 @@ static u32 xelp_intr_disable(struct xe_device *xe)
static u32
gu_misc_irq_ack(struct xe_device *xe, const u32 master_ctl)
{
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
u32 iir;
if (!(master_ctl & GU_MISC_IRQ))
@@ -118,7 +117,7 @@ gu_misc_irq_ack(struct xe_device *xe, const u32 master_ctl)
static inline void xelp_intr_enable(struct xe_device *xe, bool stall)
{
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
xe_mmio_write32(mmio, GFX_MSTR_IRQ, MASTER_IRQ);
if (stall)
@@ -129,12 +128,13 @@ static inline void xelp_intr_enable(struct xe_device *xe, bool stall)
void xe_irq_enable_hwe(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
+ struct xe_mmio *mmio = &gt->mmio;
u32 ccs_mask, bcs_mask;
u32 irqs, dmask, smask;
u32 gsc_mask = 0;
u32 heci_mask = 0;
- if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe))
+ if (xe_device_uses_memirq(xe))
return;
if (xe_device_uc_enabled(xe)) {
@@ -155,35 +155,35 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
if (!xe_gt_is_media_type(gt)) {
/* Enable interrupts for each engine class */
- xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE, dmask);
+ xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, dmask);
if (ccs_mask)
- xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE, smask);
+ xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE, smask);
/* Unmask interrupts for each engine instance */
- xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK, ~smask);
- xe_mmio_write32(gt, BCS_RSVD_INTR_MASK, ~smask);
+ xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK, ~smask);
+ xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK, ~smask);
if (bcs_mask & (BIT(1)|BIT(2)))
- xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask);
if (bcs_mask & (BIT(3)|BIT(4)))
- xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask);
if (bcs_mask & (BIT(5)|BIT(6)))
- xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask);
if (bcs_mask & (BIT(7)|BIT(8)))
- xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask);
if (ccs_mask & (BIT(0)|BIT(1)))
- xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~dmask);
if (ccs_mask & (BIT(2)|BIT(3)))
- xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~dmask);
}
if (xe_gt_is_media_type(gt) || MEDIA_VER(xe) < 13) {
/* Enable interrupts for each engine class */
- xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE, dmask);
+ xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE, dmask);
/* Unmask interrupts for each engine instance */
- xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK, ~dmask);
- xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask);
- xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK, ~dmask);
/*
* the heci2 interrupt is enabled via the same register as the
@@ -197,17 +197,17 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
}
if (gsc_mask) {
- xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, gsc_mask | heci_mask);
- xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~gsc_mask);
+ xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, gsc_mask | heci_mask);
+ xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~gsc_mask);
}
if (heci_mask)
- xe_mmio_write32(gt, HECI2_RSVD_INTR_MASK, ~(heci_mask << 16));
+ xe_mmio_write32(mmio, HECI2_RSVD_INTR_MASK, ~(heci_mask << 16));
}
}
static u32
gt_engine_identity(struct xe_device *xe,
- struct xe_gt *mmio,
+ struct xe_mmio *mmio,
const unsigned int bank,
const unsigned int bit)
{
@@ -279,7 +279,7 @@ static struct xe_gt *pick_engine_gt(struct xe_tile *tile,
return tile->media_gt;
default:
break;
- };
+ }
fallthrough;
default:
return tile->primary_gt;
@@ -291,7 +291,7 @@ static void gt_irq_handler(struct xe_tile *tile,
u32 *identity)
{
struct xe_device *xe = tile_to_xe(tile);
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
unsigned int bank, bit;
u16 instance, intr_vec;
enum xe_engine_class class;
@@ -376,7 +376,7 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg)
static u32 dg1_intr_disable(struct xe_device *xe)
{
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
u32 val;
/* First disable interrupts */
@@ -394,7 +394,7 @@ static u32 dg1_intr_disable(struct xe_device *xe)
static void dg1_intr_enable(struct xe_device *xe, bool stall)
{
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
xe_mmio_write32(mmio, DG1_MSTR_TILE_INTR, DG1_MSTR_IRQ);
if (stall)
@@ -431,7 +431,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
}
for_each_tile(tile, xe, id) {
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
if ((master_tile_ctl & DG1_MSTR_TILE(tile->id)) == 0)
continue;
@@ -474,7 +474,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
static void gt_irq_reset(struct xe_tile *tile)
{
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
u32 ccs_mask = xe_hw_engine_mask_per_class(tile->primary_gt,
XE_ENGINE_CLASS_COMPUTE);
@@ -504,7 +504,7 @@ static void gt_irq_reset(struct xe_tile *tile)
if (ccs_mask & (BIT(0)|BIT(1)))
xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~0);
if (ccs_mask & (BIT(2)|BIT(3)))
- xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~0);
+ xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~0);
if ((tile->media_gt &&
xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) ||
@@ -547,7 +547,7 @@ static void dg1_irq_reset(struct xe_tile *tile)
static void dg1_irq_reset_mstr(struct xe_tile *tile)
{
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
xe_mmio_write32(mmio, GFX_MSTR_IRQ, ~0);
}
@@ -566,7 +566,7 @@ static void vf_irq_reset(struct xe_device *xe)
for_each_tile(tile, xe, id) {
if (xe_device_has_memirq(xe))
- xe_memirq_reset(&tile->sriov.vf.memirq);
+ xe_memirq_reset(&tile->memirq);
else
gt_irq_reset(tile);
}
@@ -609,7 +609,7 @@ static void vf_irq_postinstall(struct xe_device *xe)
for_each_tile(tile, xe, id)
if (xe_device_has_memirq(xe))
- xe_memirq_postinstall(&tile->sriov.vf.memirq);
+ xe_memirq_postinstall(&tile->memirq);
if (GRAPHICS_VERx100(xe) < 1210)
xelp_intr_enable(xe, true);
@@ -652,7 +652,7 @@ static irqreturn_t vf_mem_irq_handler(int irq, void *arg)
spin_unlock(&xe->irq.lock);
for_each_tile(tile, xe, id)
- xe_memirq_handler(&tile->sriov.vf.memirq);
+ xe_memirq_handler(&tile->memirq);
return IRQ_HANDLED;
}
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
index 8999ac511555..a60ceae4c6dd 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -193,7 +193,7 @@ static void lmtt_setup_dir_ptr(struct xe_lmtt *lmtt)
lmtt_assert(lmtt, xe_bo_is_vram(lmtt->pd->bo));
lmtt_assert(lmtt, IS_ALIGNED(offset, SZ_64K));
- xe_mmio_write32(tile->primary_gt,
+ xe_mmio_write32(&tile->mmio,
GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG,
LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K));
}
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index aec7db39c061..4f64c7f4e68d 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -38,24 +38,6 @@
#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
-struct xe_lrc_snapshot {
- struct xe_bo *lrc_bo;
- void *lrc_snapshot;
- unsigned long lrc_size, lrc_offset;
-
- u32 context_desc;
- u32 indirect_context_desc;
- u32 head;
- struct {
- u32 internal;
- u32 memory;
- } tail;
- u32 start_seqno;
- u32 seqno;
- u32 ctx_timestamp;
- u32 ctx_job_timestamp;
-};
-
static struct xe_device *
lrc_to_xe(struct xe_lrc *lrc)
{
@@ -599,10 +581,10 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
{
- struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->sriov.vf.memirq;
+ struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->memirq;
struct xe_device *xe = gt_to_xe(hwe->gt);
- if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe))
+ if (!xe_device_uses_memirq(xe))
return;
regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
@@ -613,9 +595,9 @@ static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
- regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq);
+ regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe);
regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
- regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq);
+ regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe);
}
static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index c24542e89318..40d8f6906d3e 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -17,9 +17,26 @@ enum xe_engine_class;
struct xe_gt;
struct xe_hw_engine;
struct xe_lrc;
-struct xe_lrc_snapshot;
struct xe_vm;
+struct xe_lrc_snapshot {
+ struct xe_bo *lrc_bo;
+ void *lrc_snapshot;
+ unsigned long lrc_size, lrc_offset;
+
+ u32 context_desc;
+ u32 indirect_context_desc;
+ u32 head;
+ struct {
+ u32 internal;
+ u32 memory;
+ } tail;
+ u32 start_seqno;
+ u32 seqno;
+ u32 ctx_timestamp;
+ u32 ctx_job_timestamp;
+};
+
#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c
index 95b6e9d7b7db..f833da88150a 100644
--- a/drivers/gpu/drm/xe/xe_memirq.c
+++ b/drivers/gpu/drm/xe/xe_memirq.c
@@ -5,8 +5,8 @@
#include <drm/drm_managed.h>
-#include "regs/xe_gt_regs.h"
#include "regs/xe_guc_regs.h"
+#include "regs/xe_irq_regs.h"
#include "regs/xe_regs.h"
#include "xe_assert.h"
@@ -19,15 +19,25 @@
#include "xe_hw_engine.h"
#include "xe_map.h"
#include "xe_memirq.h"
-#include "xe_sriov.h"
-#include "xe_sriov_printk.h"
#define memirq_assert(m, condition) xe_tile_assert(memirq_to_tile(m), condition)
-#define memirq_debug(m, msg...) xe_sriov_dbg_verbose(memirq_to_xe(m), "MEMIRQ: " msg)
+#define memirq_printk(m, _level, _fmt, ...) \
+ drm_##_level(&memirq_to_xe(m)->drm, "MEMIRQ%u: " _fmt, \
+ memirq_to_tile(m)->id, ##__VA_ARGS__)
+
+#ifdef CONFIG_DRM_XE_DEBUG_MEMIRQ
+#define memirq_debug(m, _fmt, ...) memirq_printk(m, dbg, _fmt, ##__VA_ARGS__)
+#else
+#define memirq_debug(...)
+#endif
+
+#define memirq_err(m, _fmt, ...) memirq_printk(m, err, _fmt, ##__VA_ARGS__)
+#define memirq_err_ratelimited(m, _fmt, ...) \
+ memirq_printk(m, err_ratelimited, _fmt, ##__VA_ARGS__)
static struct xe_tile *memirq_to_tile(struct xe_memirq *memirq)
{
- return container_of(memirq, struct xe_tile, sriov.vf.memirq);
+ return container_of(memirq, struct xe_tile, memirq);
}
static struct xe_device *memirq_to_xe(struct xe_memirq *memirq)
@@ -105,6 +115,44 @@ static const char *guc_name(struct xe_guc *guc)
* | |
* | |
* +-----------+
+ *
+ *
+ * MSI-X use case
+ *
+ * When using MSI-X, hw engines report interrupt status and source to engine
+ * instance 0. For this scenario, in order to differentiate between the
+ * engines, we need to pass different status/source pointers in the LRC.
+ *
+ * The requirements on those pointers are:
+ * - Interrupt status should be 4KiB aligned
+ * - Interrupt source should be 64 bytes aligned
+ *
+ * To accommodate this, we duplicate the memirq page layout above -
+ * allocating a page for each engine instance and pass this page in the LRC.
+ * Note that the same page can be reused for different engine types.
+ * For example, an LRC executing on CCS #x will have pointers to page #x,
+ * and an LRC executing on BCS #x will have the same pointers.
+ *
+ * ::
+ *
+ * 0x0000 +==============================+ <== page for instance 0 (BCS0, CCS0, etc.)
+ * | Interrupt Status Report Page |
+ * 0x0400 +==============================+
+ * | Interrupt Source Report Page |
+ * 0x0440 +==============================+
+ * | Interrupt Enable Mask |
+ * +==============================+
+ * | Not used |
+ * 0x1000 +==============================+ <== page for instance 1 (BCS1, CCS1, etc.)
+ * | Interrupt Status Report Page |
+ * 0x1400 +==============================+
+ * | Interrupt Source Report Page |
+ * 0x1440 +==============================+
+ * | Not used |
+ * 0x2000 +==============================+ <== page for instance 2 (BCS2, CCS2, etc.)
+ * | ... |
+ * +==============================+
+ *
*/
static void __release_xe_bo(struct drm_device *drm, void *arg)
@@ -114,18 +162,30 @@ static void __release_xe_bo(struct drm_device *drm, void *arg)
xe_bo_unpin_map_no_vm(bo);
}
+static inline bool hw_reports_to_instance_zero(struct xe_memirq *memirq)
+{
+ /*
+ * When the HW engines are configured to use MSI-X,
+ * they report interrupt status and source to the offset of
+ * engine instance 0.
+ */
+ return xe_device_has_msix(memirq_to_xe(memirq));
+}
+
static int memirq_alloc_pages(struct xe_memirq *memirq)
{
struct xe_device *xe = memirq_to_xe(memirq);
struct xe_tile *tile = memirq_to_tile(memirq);
+ size_t bo_size = hw_reports_to_instance_zero(memirq) ?
+ XE_HW_ENGINE_MAX_INSTANCE * SZ_4K : SZ_4K;
struct xe_bo *bo;
int err;
- BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_SOURCE_OFFSET, SZ_64));
- BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_STATUS_OFFSET, SZ_4K));
+ BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_SOURCE_OFFSET(0), SZ_64));
+ BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_STATUS_OFFSET(0), SZ_4K));
/* XXX: convert to managed bo */
- bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
+ bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size,
ttm_bo_type_kernel,
XE_BO_FLAG_SYSTEM |
XE_BO_FLAG_GGTT |
@@ -140,25 +200,25 @@ static int memirq_alloc_pages(struct xe_memirq *memirq)
memirq_assert(memirq, !xe_bo_is_vram(bo));
memirq_assert(memirq, !memirq->bo);
- iosys_map_memset(&bo->vmap, 0, 0, SZ_4K);
+ iosys_map_memset(&bo->vmap, 0, 0, bo_size);
memirq->bo = bo;
- memirq->source = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_SOURCE_OFFSET);
- memirq->status = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_STATUS_OFFSET);
+ memirq->source = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_SOURCE_OFFSET(0));
+ memirq->status = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_STATUS_OFFSET(0));
memirq->mask = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_ENABLE_OFFSET);
memirq_assert(memirq, !memirq->source.is_iomem);
memirq_assert(memirq, !memirq->status.is_iomem);
memirq_assert(memirq, !memirq->mask.is_iomem);
- memirq_debug(memirq, "page offsets: source %#x status %#x\n",
- xe_memirq_source_ptr(memirq), xe_memirq_status_ptr(memirq));
+ memirq_debug(memirq, "page offsets: bo %#x bo_size %zu source %#x status %#x\n",
+ xe_bo_ggtt_addr(bo), bo_size, XE_MEMIRQ_SOURCE_OFFSET(0),
+ XE_MEMIRQ_STATUS_OFFSET(0));
return drmm_add_action_or_reset(&xe->drm, __release_xe_bo, memirq->bo);
out:
- xe_sriov_err(memirq_to_xe(memirq),
- "Failed to allocate memirq page (%pe)\n", ERR_PTR(err));
+ memirq_err(memirq, "Failed to allocate memirq page (%pe)\n", ERR_PTR(err));
return err;
}
@@ -178,9 +238,7 @@ static void memirq_set_enable(struct xe_memirq *memirq, bool enable)
*
* These allocations are managed and will be implicitly released on unload.
*
- * Note: This function shall be called only by the VF driver.
- *
- * If this function fails then VF driver won't be able to operate correctly.
+ * If this function fails then the driver won't be able to operate correctly.
* If `Memory Based Interrupts`_ are not used this function will return 0.
*
* Return: 0 on success or a negative error code on failure.
@@ -190,9 +248,7 @@ int xe_memirq_init(struct xe_memirq *memirq)
struct xe_device *xe = memirq_to_xe(memirq);
int err;
- memirq_assert(memirq, IS_SRIOV_VF(xe));
-
- if (!xe_device_has_memirq(xe))
+ if (!xe_device_uses_memirq(xe))
return 0;
err = memirq_alloc_pages(memirq);
@@ -205,55 +261,70 @@ int xe_memirq_init(struct xe_memirq *memirq)
return 0;
}
+static u32 __memirq_source_page(struct xe_memirq *memirq, u16 instance)
+{
+ memirq_assert(memirq, instance <= XE_HW_ENGINE_MAX_INSTANCE);
+ memirq_assert(memirq, memirq->bo);
+
+ instance = hw_reports_to_instance_zero(memirq) ? instance : 0;
+ return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_SOURCE_OFFSET(instance);
+}
+
/**
* xe_memirq_source_ptr - Get GGTT's offset of the `Interrupt Source Report Page`_.
* @memirq: the &xe_memirq to query
+ * @hwe: the hw engine for which we want the report page
*
- * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * Shall be called when `Memory Based Interrupts`_ are used
* and xe_memirq_init() didn't fail.
*
* Return: GGTT's offset of the `Interrupt Source Report Page`_.
*/
-u32 xe_memirq_source_ptr(struct xe_memirq *memirq)
+u32 xe_memirq_source_ptr(struct xe_memirq *memirq, struct xe_hw_engine *hwe)
{
- memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
- memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+ memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq)));
+
+ return __memirq_source_page(memirq, hwe->instance);
+}
+
+static u32 __memirq_status_page(struct xe_memirq *memirq, u16 instance)
+{
+ memirq_assert(memirq, instance <= XE_HW_ENGINE_MAX_INSTANCE);
memirq_assert(memirq, memirq->bo);
- return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_SOURCE_OFFSET;
+ instance = hw_reports_to_instance_zero(memirq) ? instance : 0;
+ return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_STATUS_OFFSET(instance);
}
/**
* xe_memirq_status_ptr - Get GGTT's offset of the `Interrupt Status Report Page`_.
* @memirq: the &xe_memirq to query
+ * @hwe: the hw engine for which we want the report page
*
- * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * Shall be called when `Memory Based Interrupts`_ are used
* and xe_memirq_init() didn't fail.
*
* Return: GGTT's offset of the `Interrupt Status Report Page`_.
*/
-u32 xe_memirq_status_ptr(struct xe_memirq *memirq)
+u32 xe_memirq_status_ptr(struct xe_memirq *memirq, struct xe_hw_engine *hwe)
{
- memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
- memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
- memirq_assert(memirq, memirq->bo);
+ memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq)));
- return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_STATUS_OFFSET;
+ return __memirq_status_page(memirq, hwe->instance);
}
/**
* xe_memirq_enable_ptr - Get GGTT's offset of the Interrupt Enable Mask.
* @memirq: the &xe_memirq to query
*
- * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * Shall be called when `Memory Based Interrupts`_ are used
* and xe_memirq_init() didn't fail.
*
* Return: GGTT's offset of the Interrupt Enable Mask.
*/
u32 xe_memirq_enable_ptr(struct xe_memirq *memirq)
{
- memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
- memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+ memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq)));
memirq_assert(memirq, memirq->bo);
return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_ENABLE_OFFSET;
@@ -267,7 +338,7 @@ u32 xe_memirq_enable_ptr(struct xe_memirq *memirq)
* Register `Interrupt Source Report Page`_ and `Interrupt Status Report Page`_
* to be used by the GuC when `Memory Based Interrupts`_ are required.
*
- * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * Shall be called when `Memory Based Interrupts`_ are used
* and xe_memirq_init() didn't fail.
*
* Return: 0 on success or a negative error code on failure.
@@ -279,12 +350,10 @@ int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc)
u32 source, status;
int err;
- memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
- memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
- memirq_assert(memirq, memirq->bo);
+ memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq)));
- source = xe_memirq_source_ptr(memirq) + offset;
- status = xe_memirq_status_ptr(memirq) + offset * SZ_16;
+ source = __memirq_source_page(memirq, 0) + offset;
+ status = __memirq_status_page(memirq, 0) + offset * SZ_16;
err = xe_guc_self_cfg64(guc, GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_KEY,
source);
@@ -299,9 +368,8 @@ int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc)
return 0;
failed:
- xe_sriov_err(memirq_to_xe(memirq),
- "Failed to setup report pages in %s (%pe)\n",
- guc_name(guc), ERR_PTR(err));
+ memirq_err(memirq, "Failed to setup report pages in %s (%pe)\n",
+ guc_name(guc), ERR_PTR(err));
return err;
}
@@ -311,13 +379,12 @@ failed:
*
* This is part of the driver IRQ setup flow.
*
- * This function shall only be used by the VF driver on platforms that use
+ * This function shall only be used on platforms that use
* `Memory Based Interrupts`_.
*/
void xe_memirq_reset(struct xe_memirq *memirq)
{
- memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
- memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+ memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq)));
if (memirq->bo)
memirq_set_enable(memirq, false);
@@ -329,13 +396,12 @@ void xe_memirq_reset(struct xe_memirq *memirq)
*
* This is part of the driver IRQ setup flow.
*
- * This function shall only be used by the VF driver on platforms that use
+ * This function shall only be used on platforms that use
* `Memory Based Interrupts`_.
*/
void xe_memirq_postinstall(struct xe_memirq *memirq)
{
- memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
- memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+ memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq)));
if (memirq->bo)
memirq_set_enable(memirq, true);
@@ -349,9 +415,9 @@ static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector,
value = iosys_map_rd(vector, offset, u8);
if (value) {
if (value != 0xff)
- xe_sriov_err_ratelimited(memirq_to_xe(memirq),
- "Unexpected memirq value %#x from %s at %u\n",
- value, name, offset);
+ memirq_err_ratelimited(memirq,
+ "Unexpected memirq value %#x from %s at %u\n",
+ value, name, offset);
iosys_map_wr(vector, offset, u8, 0x00);
}
@@ -379,6 +445,28 @@ static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *stat
}
/**
+ * xe_memirq_hwe_handler - Check and process interrupts for a specific HW engine.
+ * @memirq: the &xe_memirq
+ * @hwe: the hw engine to process
+ *
+ * This function reads and dispatches `Memory Based Interrupts` for the provided HW engine.
+ */
+void xe_memirq_hwe_handler(struct xe_memirq *memirq, struct xe_hw_engine *hwe)
+{
+ u16 offset = hwe->irq_offset;
+ u16 instance = hw_reports_to_instance_zero(memirq) ? hwe->instance : 0;
+ struct iosys_map src_offset = IOSYS_MAP_INIT_OFFSET(&memirq->bo->vmap,
+ XE_MEMIRQ_SOURCE_OFFSET(instance));
+
+ if (memirq_received(memirq, &src_offset, offset, "SRC")) {
+ struct iosys_map status_offset =
+ IOSYS_MAP_INIT_OFFSET(&memirq->bo->vmap,
+ XE_MEMIRQ_STATUS_OFFSET(instance) + offset * SZ_16);
+ memirq_dispatch_engine(memirq, &status_offset, hwe);
+ }
+}
+
+/**
* xe_memirq_handler - The `Memory Based Interrupts`_ Handler.
* @memirq: the &xe_memirq
*
@@ -405,13 +493,8 @@ void xe_memirq_handler(struct xe_memirq *memirq)
if (gt->tile != tile)
continue;
- for_each_hw_engine(hwe, gt, id) {
- if (memirq_received(memirq, &memirq->source, hwe->irq_offset, "SRC")) {
- map = IOSYS_MAP_INIT_OFFSET(&memirq->status,
- hwe->irq_offset * SZ_16);
- memirq_dispatch_engine(memirq, &map, hwe);
- }
- }
+ for_each_hw_engine(hwe, gt, id)
+ xe_memirq_hwe_handler(memirq, hwe);
}
/* GuC and media GuC (if present) must be checked separately */
diff --git a/drivers/gpu/drm/xe/xe_memirq.h b/drivers/gpu/drm/xe/xe_memirq.h
index 2d40d03c3095..06130650e9d6 100644
--- a/drivers/gpu/drm/xe/xe_memirq.h
+++ b/drivers/gpu/drm/xe/xe_memirq.h
@@ -9,16 +9,18 @@
#include <linux/types.h>
struct xe_guc;
+struct xe_hw_engine;
struct xe_memirq;
int xe_memirq_init(struct xe_memirq *memirq);
-u32 xe_memirq_source_ptr(struct xe_memirq *memirq);
-u32 xe_memirq_status_ptr(struct xe_memirq *memirq);
+u32 xe_memirq_source_ptr(struct xe_memirq *memirq, struct xe_hw_engine *hwe);
+u32 xe_memirq_status_ptr(struct xe_memirq *memirq, struct xe_hw_engine *hwe);
u32 xe_memirq_enable_ptr(struct xe_memirq *memirq);
void xe_memirq_reset(struct xe_memirq *memirq);
void xe_memirq_postinstall(struct xe_memirq *memirq);
+void xe_memirq_hwe_handler(struct xe_memirq *memirq, struct xe_hw_engine *hwe);
void xe_memirq_handler(struct xe_memirq *memirq);
int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc);
diff --git a/drivers/gpu/drm/xe/xe_memirq_types.h b/drivers/gpu/drm/xe/xe_memirq_types.h
index 625b6b8736cc..9d0f6c1cdb9d 100644
--- a/drivers/gpu/drm/xe/xe_memirq_types.h
+++ b/drivers/gpu/drm/xe/xe_memirq_types.h
@@ -11,9 +11,9 @@
struct xe_bo;
/* ISR */
-#define XE_MEMIRQ_STATUS_OFFSET 0x0
+#define XE_MEMIRQ_STATUS_OFFSET(inst) ((inst) * SZ_4K + 0x0)
/* IIR */
-#define XE_MEMIRQ_SOURCE_OFFSET 0x400
+#define XE_MEMIRQ_SOURCE_OFFSET(inst) ((inst) * SZ_4K + 0x400)
/* IMR */
#define XE_MEMIRQ_ENABLE_OFFSET 0x440
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 3fd462fda625..a48f239cad1c 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -36,13 +36,19 @@ static void tiles_fini(void *arg)
/*
* On multi-tile devices, partition the BAR space for MMIO on each tile,
* possibly accounting for register override on the number of tiles available.
+ * tile_mmio_size contains both the tile's 4MB register space, as well as
+ * additional space for the GTT and other (possibly unused) regions).
* Resulting memory layout is like below:
*
* .----------------------. <- tile_count * tile_mmio_size
* | .... |
* |----------------------| <- 2 * tile_mmio_size
+ * | tile1 GTT + other |
+ * |----------------------| <- 1 * tile_mmio_size + 4MB
* | tile1->mmio.regs |
* |----------------------| <- 1 * tile_mmio_size
+ * | tile0 GTT + other |
+ * |----------------------| <- 4MB
* | tile0->mmio.regs |
* '----------------------' <- 0MB
*/
@@ -61,16 +67,16 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size)
/* Possibly override number of tile based on configuration register */
if (!xe->info.skip_mtcfg) {
- struct xe_gt *gt = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
u8 tile_count;
u32 mtcfg;
/*
* Although the per-tile mmio regs are not yet initialized, this
- * is fine as it's going to the root gt, that's guaranteed to be
- * initialized earlier in xe_mmio_init()
+ * is fine as it's going to the root tile's mmio, that's
+ * guaranteed to be initialized earlier in xe_mmio_init()
*/
- mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR);
+ mtcfg = xe_mmio_read64_2x32(mmio, XEHP_MTCFG_ADDR);
tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
if (tile_count < xe->info.tile_count) {
@@ -90,8 +96,9 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size)
regs = xe->mmio.regs;
for_each_tile(tile, xe, id) {
- tile->mmio.size = tile_mmio_size;
+ tile->mmio.regs_size = SZ_4M;
tile->mmio.regs = regs;
+ tile->mmio.tile = tile;
regs += tile_mmio_size;
}
}
@@ -126,8 +133,9 @@ static void mmio_extension_setup(struct xe_device *xe, size_t tile_mmio_size,
regs = xe->mmio.regs + tile_mmio_size * xe->info.tile_count;
for_each_tile(tile, xe, id) {
- tile->mmio_ext.size = tile_mmio_ext_size;
+ tile->mmio_ext.regs_size = tile_mmio_ext_size;
tile->mmio_ext.regs = regs;
+ tile->mmio_ext.tile = tile;
regs += tile_mmio_ext_size;
}
}
@@ -157,137 +165,132 @@ int xe_mmio_init(struct xe_device *xe)
{
struct xe_tile *root_tile = xe_device_get_root_tile(xe);
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
- const int mmio_bar = 0;
/*
* Map the entire BAR.
* The first 16MB of the BAR, belong to the root tile, and include:
* registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB).
*/
- xe->mmio.size = pci_resource_len(pdev, mmio_bar);
- xe->mmio.regs = pci_iomap(pdev, mmio_bar, GTTMMADR_BAR);
+ xe->mmio.size = pci_resource_len(pdev, GTTMMADR_BAR);
+ xe->mmio.regs = pci_iomap(pdev, GTTMMADR_BAR, 0);
if (xe->mmio.regs == NULL) {
drm_err(&xe->drm, "failed to map registers\n");
return -EIO;
}
/* Setup first tile; other tiles (if present) will be setup later. */
- root_tile->mmio.size = SZ_16M;
+ root_tile->mmio.regs_size = SZ_4M;
root_tile->mmio.regs = xe->mmio.regs;
+ root_tile->mmio.tile = root_tile;
return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe);
}
-static void mmio_flush_pending_writes(struct xe_gt *gt)
+static void mmio_flush_pending_writes(struct xe_mmio *mmio)
{
#define DUMMY_REG_OFFSET 0x130030
- struct xe_tile *tile = gt_to_tile(gt);
int i;
- if (tile->xe->info.platform != XE_LUNARLAKE)
+ if (mmio->tile->xe->info.platform != XE_LUNARLAKE)
return;
/* 4 dummy writes */
for (i = 0; i < 4; i++)
- writel(0, tile->mmio.regs + DUMMY_REG_OFFSET);
+ writel(0, mmio->regs + DUMMY_REG_OFFSET);
}
-u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
+u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg)
{
- struct xe_tile *tile = gt_to_tile(gt);
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
u8 val;
/* Wa_15015404425 */
- mmio_flush_pending_writes(gt);
+ mmio_flush_pending_writes(mmio);
- val = readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
- trace_xe_reg_rw(gt, false, addr, val, sizeof(val));
+ val = readb(mmio->regs + addr);
+ trace_xe_reg_rw(mmio, false, addr, val, sizeof(val));
return val;
}
-u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
+u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg)
{
- struct xe_tile *tile = gt_to_tile(gt);
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
u16 val;
/* Wa_15015404425 */
- mmio_flush_pending_writes(gt);
+ mmio_flush_pending_writes(mmio);
- val = readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
- trace_xe_reg_rw(gt, false, addr, val, sizeof(val));
+ val = readw(mmio->regs + addr);
+ trace_xe_reg_rw(mmio, false, addr, val, sizeof(val));
return val;
}
-void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val)
+void xe_mmio_write32(struct xe_mmio *mmio, struct xe_reg reg, u32 val)
{
- struct xe_tile *tile = gt_to_tile(gt);
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
- trace_xe_reg_rw(gt, true, addr, val, sizeof(val));
+ trace_xe_reg_rw(mmio, true, addr, val, sizeof(val));
- if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt)))
- xe_gt_sriov_vf_write32(gt, reg, val);
+ if (!reg.vf && mmio->sriov_vf_gt)
+ xe_gt_sriov_vf_write32(mmio->sriov_vf_gt, reg, val);
else
- writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
+ writel(val, mmio->regs + addr);
}
-u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
+u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg)
{
- struct xe_tile *tile = gt_to_tile(gt);
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
u32 val;
/* Wa_15015404425 */
- mmio_flush_pending_writes(gt);
+ mmio_flush_pending_writes(mmio);
- if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt)))
- val = xe_gt_sriov_vf_read32(gt, reg);
+ if (!reg.vf && mmio->sriov_vf_gt)
+ val = xe_gt_sriov_vf_read32(mmio->sriov_vf_gt, reg);
else
- val = readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
+ val = readl(mmio->regs + addr);
- trace_xe_reg_rw(gt, false, addr, val, sizeof(val));
+ trace_xe_reg_rw(mmio, false, addr, val, sizeof(val));
return val;
}
-u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set)
+u32 xe_mmio_rmw32(struct xe_mmio *mmio, struct xe_reg reg, u32 clr, u32 set)
{
u32 old, reg_val;
- old = xe_mmio_read32(gt, reg);
+ old = xe_mmio_read32(mmio, reg);
reg_val = (old & ~clr) | set;
- xe_mmio_write32(gt, reg, reg_val);
+ xe_mmio_write32(mmio, reg, reg_val);
return old;
}
-int xe_mmio_write32_and_verify(struct xe_gt *gt,
+int xe_mmio_write32_and_verify(struct xe_mmio *mmio,
struct xe_reg reg, u32 val, u32 mask, u32 eval)
{
u32 reg_val;
- xe_mmio_write32(gt, reg, val);
- reg_val = xe_mmio_read32(gt, reg);
+ xe_mmio_write32(mmio, reg, val);
+ reg_val = xe_mmio_read32(mmio, reg);
return (reg_val & mask) != eval ? -EINVAL : 0;
}
-bool xe_mmio_in_range(const struct xe_gt *gt,
+bool xe_mmio_in_range(const struct xe_mmio *mmio,
const struct xe_mmio_range *range,
struct xe_reg reg)
{
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
return range && addr >= range->start && addr <= range->end;
}
/**
* xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads
- * @gt: MMIO target GT
+ * @mmio: MMIO target
* @reg: register to read value from
*
* Although Intel GPUs have some 64-bit registers, the hardware officially
@@ -307,21 +310,21 @@ bool xe_mmio_in_range(const struct xe_gt *gt,
*
* Returns the value of the 64-bit register.
*/
-u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
+u64 xe_mmio_read64_2x32(struct xe_mmio *mmio, struct xe_reg reg)
{
struct xe_reg reg_udw = { .addr = reg.addr + 0x4 };
u32 ldw, udw, oldudw, retries;
- reg.addr = xe_mmio_adjusted_addr(gt, reg.addr);
- reg_udw.addr = xe_mmio_adjusted_addr(gt, reg_udw.addr);
+ reg.addr = xe_mmio_adjusted_addr(mmio, reg.addr);
+ reg_udw.addr = xe_mmio_adjusted_addr(mmio, reg_udw.addr);
/* we shouldn't adjust just one register address */
- xe_gt_assert(gt, reg_udw.addr == reg.addr + 0x4);
+ xe_tile_assert(mmio->tile, reg_udw.addr == reg.addr + 0x4);
- oldudw = xe_mmio_read32(gt, reg_udw);
+ oldudw = xe_mmio_read32(mmio, reg_udw);
for (retries = 5; retries; --retries) {
- ldw = xe_mmio_read32(gt, reg);
- udw = xe_mmio_read32(gt, reg_udw);
+ ldw = xe_mmio_read32(mmio, reg);
+ udw = xe_mmio_read32(mmio, reg_udw);
if (udw == oldudw)
break;
@@ -329,13 +332,13 @@ u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
oldudw = udw;
}
- xe_gt_WARN(gt, retries == 0,
- "64-bit read of %#x did not stabilize\n", reg.addr);
+ drm_WARN(&mmio->tile->xe->drm, retries == 0,
+ "64-bit read of %#x did not stabilize\n", reg.addr);
return (u64)udw << 32 | ldw;
}
-static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
+static int __xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
u32 *out_val, bool atomic, bool expect_match)
{
ktime_t cur = ktime_get_raw();
@@ -346,7 +349,7 @@ static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 v
bool check;
for (;;) {
- read = xe_mmio_read32(gt, reg);
+ read = xe_mmio_read32(mmio, reg);
check = (read & mask) == val;
if (!expect_match)
@@ -372,7 +375,7 @@ static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 v
}
if (ret != 0) {
- read = xe_mmio_read32(gt, reg);
+ read = xe_mmio_read32(mmio, reg);
check = (read & mask) == val;
if (!expect_match)
@@ -390,7 +393,7 @@ static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 v
/**
* xe_mmio_wait32() - Wait for a register to match the desired masked value
- * @gt: MMIO target GT
+ * @mmio: MMIO target
* @reg: register to read value from
* @mask: mask to be applied to the value read from the register
* @val: desired value after applying the mask
@@ -407,15 +410,15 @@ static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 v
* @timeout_us for different reasons, specially in non-atomic contexts. Thus,
* it is possible that this function succeeds even after @timeout_us has passed.
*/
-int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
+int xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
u32 *out_val, bool atomic)
{
- return __xe_mmio_wait32(gt, reg, mask, val, timeout_us, out_val, atomic, true);
+ return __xe_mmio_wait32(mmio, reg, mask, val, timeout_us, out_val, atomic, true);
}
/**
* xe_mmio_wait32_not() - Wait for a register to return anything other than the given masked value
- * @gt: MMIO target GT
+ * @mmio: MMIO target
* @reg: register to read value from
* @mask: mask to be applied to the value read from the register
* @val: value not to be matched after applying the mask
@@ -426,8 +429,8 @@ int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 t
* This function works exactly like xe_mmio_wait32() with the exception that
* @val is expected not to be matched.
*/
-int xe_mmio_wait32_not(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
+int xe_mmio_wait32_not(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
u32 *out_val, bool atomic)
{
- return __xe_mmio_wait32(gt, reg, mask, val, timeout_us, out_val, atomic, false);
+ return __xe_mmio_wait32(mmio, reg, mask, val, timeout_us, out_val, atomic, false);
}
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 26551410ecc8..8a46f4006a84 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -14,25 +14,30 @@ struct xe_reg;
int xe_mmio_init(struct xe_device *xe);
int xe_mmio_probe_tiles(struct xe_device *xe);
-u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg);
-u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg);
-void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val);
-u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg);
-u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set);
-int xe_mmio_write32_and_verify(struct xe_gt *gt, struct xe_reg reg, u32 val, u32 mask, u32 eval);
-bool xe_mmio_in_range(const struct xe_gt *gt, const struct xe_mmio_range *range, struct xe_reg reg);
-
-u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg);
-int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
- u32 *out_val, bool atomic);
-int xe_mmio_wait32_not(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
- u32 *out_val, bool atomic);
-
-static inline u32 xe_mmio_adjusted_addr(const struct xe_gt *gt, u32 addr)
+u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg);
+u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg);
+void xe_mmio_write32(struct xe_mmio *mmio, struct xe_reg reg, u32 val);
+u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg);
+u32 xe_mmio_rmw32(struct xe_mmio *mmio, struct xe_reg reg, u32 clr, u32 set);
+int xe_mmio_write32_and_verify(struct xe_mmio *mmio, struct xe_reg reg, u32 val, u32 mask, u32 eval);
+bool xe_mmio_in_range(const struct xe_mmio *mmio, const struct xe_mmio_range *range, struct xe_reg reg);
+
+u64 xe_mmio_read64_2x32(struct xe_mmio *mmio, struct xe_reg reg);
+int xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val,
+ u32 timeout_us, u32 *out_val, bool atomic);
+int xe_mmio_wait32_not(struct xe_mmio *mmio, struct xe_reg reg, u32 mask,
+ u32 val, u32 timeout_us, u32 *out_val, bool atomic);
+
+static inline u32 xe_mmio_adjusted_addr(const struct xe_mmio *mmio, u32 addr)
{
- if (addr < gt->mmio.adj_limit)
- addr += gt->mmio.adj_offset;
+ if (addr < mmio->adj_limit)
+ addr += mmio->adj_offset;
return addr;
}
+static inline struct xe_mmio *xe_root_tile_mmio(struct xe_device *xe)
+{
+ return &xe->tiles[0].mmio;
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 7ff0ac5b799a..54d199b5cfb2 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -278,7 +278,7 @@ static void xelp_lncf_dump(struct xe_mocs_info *info, struct xe_gt *gt, struct d
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_LNCFCMOCS(i));
drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n",
j++,
@@ -310,7 +310,7 @@ static void xelp_mocs_dump(struct xe_mocs_info *info, unsigned int flags,
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_GLOBAL_MOCS(i));
drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u ] (%#8x)\n",
i,
@@ -383,7 +383,7 @@ static void xehp_lncf_dump(struct xe_mocs_info *info, unsigned int flags,
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_LNCFCMOCS(i));
drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n",
j++,
@@ -428,7 +428,7 @@ static void pvc_mocs_dump(struct xe_mocs_info *info, unsigned int flags, struct
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_LNCFCMOCS(i));
drm_printf(p, "LNCFCMOCS[%2d] = [ %u ] (%#8x)\n",
j++,
@@ -510,7 +510,7 @@ static void mtl_mocs_dump(struct xe_mocs_info *info, unsigned int flags,
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_GLOBAL_MOCS(i));
drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u] (%#8x)\n",
i,
@@ -553,7 +553,7 @@ static void xe2_mocs_dump(struct xe_mocs_info *info, unsigned int flags,
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_GLOBAL_MOCS(i));
drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u, %u] (%#8x)\n",
i,
@@ -576,6 +576,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
memset(info, 0, sizeof(struct xe_mocs_info));
switch (xe->info.platform) {
+ case XE_PANTHERLAKE:
case XE_LUNARLAKE:
case XE_BATTLEMAGE:
info->ops = &xe2_mocs_ops;
@@ -690,7 +691,7 @@ static void __init_mocs_table(struct xe_gt *gt,
if (regs_are_mcr(gt))
xe_gt_mcr_multicast_write(gt, XEHP_GLOBAL_MOCS(i), mocs);
else
- xe_mmio_write32(gt, XELP_GLOBAL_MOCS(i), mocs);
+ xe_mmio_write32(&gt->mmio, XELP_GLOBAL_MOCS(i), mocs);
}
}
@@ -730,7 +731,7 @@ static void init_l3cc_table(struct xe_gt *gt,
if (regs_are_mcr(gt))
xe_gt_mcr_multicast_write(gt, XEHP_LNCFCMOCS(i), l3cc);
else
- xe_mmio_write32(gt, XELP_LNCFCMOCS(i), l3cc);
+ xe_mmio_write32(&gt->mmio, XELP_LNCFCMOCS(i), l3cc);
}
}
@@ -773,25 +774,21 @@ void xe_mocs_init(struct xe_gt *gt)
void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p)
{
- struct xe_mocs_info table;
- unsigned int flags;
- u32 ret;
struct xe_device *xe = gt_to_xe(gt);
+ struct xe_mocs_info table;
+ unsigned int fw_ref, flags;
flags = get_mocs_settings(xe, &table);
xe_pm_runtime_get_noresume(xe);
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
-
- if (ret)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
goto err_fw;
table.ops->dump(&table, flags, gt, p);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
err_fw:
- xe_assert(xe, !ret);
xe_pm_runtime_put(xe);
}
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 78823f53d290..8dd55798ab31 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -36,11 +36,22 @@
#include "xe_pm.h"
#include "xe_sched_job.h"
#include "xe_sriov.h"
+#include "xe_sync.h"
#define DEFAULT_POLL_FREQUENCY_HZ 200
#define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ)
#define XE_OA_UNIT_INVALID U32_MAX
+enum xe_oa_submit_deps {
+ XE_OA_SUBMIT_NO_DEPS,
+ XE_OA_SUBMIT_ADD_DEPS,
+};
+
+enum xe_oa_user_extn_from {
+ XE_OA_USER_EXTN_FROM_OPEN,
+ XE_OA_USER_EXTN_FROM_CONFIG,
+};
+
struct xe_oa_reg {
struct xe_reg addr;
u32 value;
@@ -70,6 +81,7 @@ struct flex {
};
struct xe_oa_open_param {
+ struct xe_file *xef;
u32 oa_unit_id;
bool sample;
u32 metric_set;
@@ -81,6 +93,9 @@ struct xe_oa_open_param {
struct xe_exec_queue *exec_q;
struct xe_hw_engine *hwe;
bool no_preempt;
+ struct drm_xe_sync __user *syncs_user;
+ int num_syncs;
+ struct xe_sync_entry *syncs;
};
struct xe_oa_config_bo {
@@ -90,6 +105,17 @@ struct xe_oa_config_bo {
struct xe_bb *bb;
};
+struct xe_oa_fence {
+ /* @base: dma fence base */
+ struct dma_fence base;
+ /* @lock: lock for the fence */
+ spinlock_t lock;
+ /* @work: work to signal @base */
+ struct delayed_work work;
+ /* @cb: callback to schedule @work */
+ struct dma_fence_cb cb;
+};
+
#define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x
static const struct xe_oa_format oa_formats[] = {
@@ -162,10 +188,10 @@ static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_se
return oa_config;
}
-static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo)
+static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo, struct dma_fence *last_fence)
{
xe_oa_config_put(oa_bo->oa_config);
- xe_bb_free(oa_bo->bb, NULL);
+ xe_bb_free(oa_bo->bb, last_fence);
kfree(oa_bo);
}
@@ -176,7 +202,7 @@ static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream)
static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
{
- return xe_mmio_read32(stream->gt, __oa_regs(stream)->oa_tail_ptr) &
+ return xe_mmio_read32(&stream->gt->mmio, __oa_regs(stream)->oa_tail_ptr) &
OAG_OATAILPTR_MASK;
}
@@ -366,7 +392,7 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr;
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- xe_mmio_write32(stream->gt, oaheadptr,
+ xe_mmio_write32(&stream->gt->mmio, oaheadptr,
(head + gtt_offset) & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = head;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
@@ -377,22 +403,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
{
+ struct xe_mmio *mmio = &stream->gt->mmio;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT;
unsigned long flags;
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_status, 0);
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_status, 0);
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_head_ptr,
gtt_offset & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = 0;
/*
* PRM says: "This MMIO must be set before the OATAILPTR register and after the
* OAHEADPTR register. This is to enable proper functionality of the overflow bit".
*/
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_buffer, oa_buf);
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_tail_ptr,
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_buffer, oa_buf);
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_tail_ptr,
gtt_offset & OAG_OATAILPTR_MASK);
/* Mark that we need updated tail pointer to read from */
@@ -444,21 +471,23 @@ static void xe_oa_enable(struct xe_oa_stream *stream)
stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG)
val |= OAG_OACONTROL_OA_PES_DISAG_EN;
- xe_mmio_write32(stream->gt, regs->oa_ctrl, val);
+ xe_mmio_write32(&stream->gt->mmio, regs->oa_ctrl, val);
}
static void xe_oa_disable(struct xe_oa_stream *stream)
{
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, 0);
- if (xe_mmio_wait32(stream->gt, __oa_regs(stream)->oa_ctrl,
+ struct xe_mmio *mmio = &stream->gt->mmio;
+
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctrl, 0);
+ if (xe_mmio_wait32(mmio, __oa_regs(stream)->oa_ctrl,
OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false))
drm_err(&stream->oa->xe->drm,
"wait for OA to be disabled timed out\n");
if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) {
/* <= XE_METEORLAKE except XE_PVC */
- xe_mmio_write32(stream->gt, OA_TLB_INV_CR, 1);
- if (xe_mmio_wait32(stream->gt, OA_TLB_INV_CR, 1, 0, 50000, NULL, false))
+ xe_mmio_write32(mmio, OA_TLB_INV_CR, 1);
+ if (xe_mmio_wait32(mmio, OA_TLB_INV_CR, 1, 0, 50000, NULL, false))
drm_err(&stream->oa->xe->drm,
"wait for OA tlb invalidate timed out\n");
}
@@ -481,7 +510,7 @@ static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf,
size_t count, size_t *offset)
{
/* Only clear our bits to avoid side-effects */
- stream->oa_status = xe_mmio_rmw32(stream->gt, __oa_regs(stream)->oa_status,
+ stream->oa_status = xe_mmio_rmw32(&stream->gt->mmio, __oa_regs(stream)->oa_status,
OASTATUS_RELEVANT_BITS, 0);
/*
* Signal to userspace that there is non-zero OA status to read via
@@ -567,11 +596,11 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait)
return ret;
}
-static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
+static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps,
+ struct xe_bb *bb)
{
struct xe_sched_job *job;
struct dma_fence *fence;
- long timeout;
int err = 0;
/* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */
@@ -581,18 +610,24 @@ static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
goto exit;
}
+ if (deps == XE_OA_SUBMIT_ADD_DEPS) {
+ for (int i = 0; i < stream->num_syncs && !err; i++)
+ err = xe_sync_entry_add_deps(&stream->syncs[i], job);
+ if (err) {
+ drm_dbg(&stream->oa->xe->drm, "xe_sync_entry_add_deps err %d\n", err);
+ goto err_put_job;
+ }
+ }
+
xe_sched_job_arm(job);
fence = dma_fence_get(&job->drm.s_fence->finished);
xe_sched_job_push(job);
- timeout = dma_fence_wait_timeout(fence, false, HZ);
- dma_fence_put(fence);
- if (timeout < 0)
- err = timeout;
- else if (!timeout)
- err = -ETIME;
+ return fence;
+err_put_job:
+ xe_sched_job_put(job);
exit:
- return err;
+ return ERR_PTR(err);
}
static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs)
@@ -636,7 +671,8 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream)
xe_oa_config_put(stream->oa_config);
llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)
- free_oa_config_bo(oa_bo);
+ free_oa_config_bo(oa_bo, stream->last_fence);
+ dma_fence_put(stream->last_fence);
}
static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
@@ -656,6 +692,7 @@ static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc,
const struct flex *flex, u32 count)
{
+ struct dma_fence *fence;
struct xe_bb *bb;
int err;
@@ -667,7 +704,16 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr
xe_oa_store_flex(stream, lrc, bb, flex, count);
- err = xe_oa_submit_bb(stream, bb);
+ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ goto free_bb;
+ }
+ xe_bb_free(bb, fence);
+ dma_fence_put(fence);
+
+ return 0;
+free_bb:
xe_bb_free(bb, NULL);
exit:
return err;
@@ -675,6 +721,7 @@ exit:
static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri)
{
+ struct dma_fence *fence;
struct xe_bb *bb;
int err;
@@ -686,7 +733,16 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re
write_cs_mi_lri(bb, reg_lri, 1);
- err = xe_oa_submit_bb(stream, bb);
+ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ goto free_bb;
+ }
+ xe_bb_free(bb, fence);
+ dma_fence_put(fence);
+
+ return 0;
+free_bb:
xe_bb_free(bb, NULL);
exit:
return err;
@@ -749,7 +805,8 @@ static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable)
int err;
/* Set ccs select to enable programming of OAC_OACONTROL */
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, __oa_ccs_select(stream));
+ xe_mmio_write32(&stream->gt->mmio, __oa_regs(stream)->oa_ctrl,
+ __oa_ccs_select(stream));
/* Modify stream hwe context image with regs_context */
err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0],
@@ -785,6 +842,7 @@ static u32 oag_configure_mmio_trigger(const struct xe_oa_stream *stream, bool en
static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
{
+ struct xe_mmio *mmio = &stream->gt->mmio;
u32 sqcnt1;
/*
@@ -798,7 +856,7 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
_MASKED_BIT_DISABLE(DISABLE_DOP_GATING));
}
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug,
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug,
oag_configure_mmio_trigger(stream, false));
/* disable the context save/restore or OAR counters */
@@ -806,13 +864,13 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
xe_oa_configure_oa_context(stream, false);
/* Make sure we disable noa to save power. */
- xe_mmio_rmw32(stream->gt, RPM_CONFIG1, GT_NOA_ENABLE, 0);
+ xe_mmio_rmw32(mmio, RPM_CONFIG1, GT_NOA_ENABLE, 0);
sqcnt1 = SQCNT1_PMON_ENABLE |
(HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0);
/* Reset PMON Enable to save power. */
- xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, sqcnt1, 0);
+ xe_mmio_rmw32(mmio, XELPMP_SQCNT1, sqcnt1, 0);
}
static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
@@ -832,7 +890,7 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
xe_oa_free_oa_buffer(stream);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
xe_pm_runtime_put(stream->oa->xe);
/* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */
@@ -840,6 +898,7 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(&gt->uc.guc.pc));
xe_oa_free_configs(stream);
+ xe_file_put(stream->xef);
}
static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream)
@@ -910,11 +969,62 @@ out:
return oa_bo;
}
+static void xe_oa_update_last_fence(struct xe_oa_stream *stream, struct dma_fence *fence)
+{
+ dma_fence_put(stream->last_fence);
+ stream->last_fence = dma_fence_get(fence);
+}
+
+static void xe_oa_fence_work_fn(struct work_struct *w)
+{
+ struct xe_oa_fence *ofence = container_of(w, typeof(*ofence), work.work);
+
+ /* Signal fence to indicate new OA configuration is active */
+ dma_fence_signal(&ofence->base);
+ dma_fence_put(&ofence->base);
+}
+
+static void xe_oa_config_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+ /* Additional empirical delay needed for NOA programming after registers are written */
+#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
+
+ struct xe_oa_fence *ofence = container_of(cb, typeof(*ofence), cb);
+
+ INIT_DELAYED_WORK(&ofence->work, xe_oa_fence_work_fn);
+ queue_delayed_work(system_unbound_wq, &ofence->work,
+ usecs_to_jiffies(NOA_PROGRAM_ADDITIONAL_DELAY_US));
+ dma_fence_put(fence);
+}
+
+static const char *xe_oa_get_driver_name(struct dma_fence *fence)
+{
+ return "xe_oa";
+}
+
+static const char *xe_oa_get_timeline_name(struct dma_fence *fence)
+{
+ return "unbound";
+}
+
+static const struct dma_fence_ops xe_oa_fence_ops = {
+ .get_driver_name = xe_oa_get_driver_name,
+ .get_timeline_name = xe_oa_get_timeline_name,
+};
+
static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config *config)
{
#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
struct xe_oa_config_bo *oa_bo;
- int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US;
+ struct xe_oa_fence *ofence;
+ int i, err, num_signal = 0;
+ struct dma_fence *fence;
+
+ ofence = kzalloc(sizeof(*ofence), GFP_KERNEL);
+ if (!ofence) {
+ err = -ENOMEM;
+ goto exit;
+ }
oa_bo = xe_oa_alloc_config_buffer(stream, config);
if (IS_ERR(oa_bo)) {
@@ -922,11 +1032,50 @@ static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config
goto exit;
}
- err = xe_oa_submit_bb(stream, oa_bo->bb);
+ /* Emit OA configuration batch */
+ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_ADD_DEPS, oa_bo->bb);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ goto exit;
+ }
- /* Additional empirical delay needed for NOA programming after registers are written */
- usleep_range(us, 2 * us);
+ /* Point of no return: initialize and set fence to signal */
+ spin_lock_init(&ofence->lock);
+ dma_fence_init(&ofence->base, &xe_oa_fence_ops, &ofence->lock, 0, 0);
+
+ for (i = 0; i < stream->num_syncs; i++) {
+ if (stream->syncs[i].flags & DRM_XE_SYNC_FLAG_SIGNAL)
+ num_signal++;
+ xe_sync_entry_signal(&stream->syncs[i], &ofence->base);
+ }
+
+ /* Additional dma_fence_get in case we dma_fence_wait */
+ if (!num_signal)
+ dma_fence_get(&ofence->base);
+
+ /* Update last fence too before adding callback */
+ xe_oa_update_last_fence(stream, fence);
+
+ /* Add job fence callback to schedule work to signal ofence->base */
+ err = dma_fence_add_callback(fence, &ofence->cb, xe_oa_config_cb);
+ xe_gt_assert(stream->gt, !err || err == -ENOENT);
+ if (err == -ENOENT)
+ xe_oa_config_cb(fence, &ofence->cb);
+
+ /* If nothing needs to be signaled we wait synchronously */
+ if (!num_signal) {
+ dma_fence_wait(&ofence->base, false);
+ dma_fence_put(&ofence->base);
+ }
+
+ /* Done with syncs */
+ for (i = 0; i < stream->num_syncs; i++)
+ xe_sync_entry_cleanup(&stream->syncs[i]);
+ kfree(stream->syncs);
+
+ return 0;
exit:
+ kfree(ofence);
return err;
}
@@ -940,6 +1089,7 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream)
static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
{
+ struct xe_mmio *mmio = &stream->gt->mmio;
u32 oa_debug, sqcnt1;
int ret;
@@ -966,12 +1116,12 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL |
OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL;
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug,
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug,
_MASKED_BIT_ENABLE(oa_debug) |
oag_report_ctx_switches(stream) |
oag_configure_mmio_trigger(stream, true));
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ?
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ?
(OAG_OAGLBCTXCTRL_COUNTER_RESUME |
OAG_OAGLBCTXCTRL_TIMER_ENABLE |
REG_FIELD_PREP(OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK,
@@ -985,7 +1135,7 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
sqcnt1 = SQCNT1_PMON_ENABLE |
(HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0);
- xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, 0, sqcnt1);
+ xe_mmio_rmw32(mmio, XELPMP_SQCNT1, 0, sqcnt1);
/* Configure OAR/OAC */
if (stream->exec_q) {
@@ -997,6 +1147,262 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
return xe_oa_emit_oa_config(stream, stream->oa_config);
}
+static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name)
+{
+ u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt);
+ u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt);
+ u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt);
+ u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt);
+ int idx;
+
+ for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) {
+ const struct xe_oa_format *f = &oa->oa_formats[idx];
+
+ if (counter_size == f->counter_size && bc_report == f->bc_report &&
+ type == f->type && counter_sel == f->counter_select) {
+ *name = idx;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ if (value >= oa->oa_unit_ids) {
+ drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value);
+ return -EINVAL;
+ }
+ param->oa_unit_id = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->sample = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->metric_set = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ int ret = decode_oa_format(oa, value, &param->oa_format);
+
+ if (ret) {
+ drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value);
+ return ret;
+ }
+ return 0;
+}
+
+static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+#define OA_EXPONENT_MAX 31
+
+ if (value > OA_EXPONENT_MAX) {
+ drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX);
+ return -EINVAL;
+ }
+ param->period_exponent = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->disabled = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->exec_queue_id = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->engine_instance = value;
+ return 0;
+}
+
+static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->no_preempt = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_num_syncs(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->num_syncs = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->syncs_user = u64_to_user_ptr(value);
+ return 0;
+}
+
+static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ return -EINVAL;
+}
+
+typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param);
+static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = {
+ [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id,
+ [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa,
+ [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set,
+ [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format,
+ [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent,
+ [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled,
+ [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id,
+ [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance,
+ [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt,
+ [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
+ [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
+};
+
+static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = {
+ [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set,
+ [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
+ [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
+};
+
+static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from,
+ u64 extension, struct xe_oa_open_param *param)
+{
+ u64 __user *address = u64_to_user_ptr(extension);
+ struct drm_xe_ext_set_property ext;
+ int err;
+ u32 idx;
+
+ err = __copy_from_user(&ext, address, sizeof(ext));
+ if (XE_IOCTL_DBG(oa->xe, err))
+ return -EFAULT;
+
+ BUILD_BUG_ON(ARRAY_SIZE(xe_oa_set_property_funcs_open) !=
+ ARRAY_SIZE(xe_oa_set_property_funcs_config));
+
+ if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs_open)) ||
+ XE_IOCTL_DBG(oa->xe, ext.pad))
+ return -EINVAL;
+
+ idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs_open));
+
+ if (from == XE_OA_USER_EXTN_FROM_CONFIG)
+ return xe_oa_set_property_funcs_config[idx](oa, ext.value, param);
+ else
+ return xe_oa_set_property_funcs_open[idx](oa, ext.value, param);
+}
+
+typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, enum xe_oa_user_extn_from from,
+ u64 extension, struct xe_oa_open_param *param);
+static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = {
+ [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property,
+};
+
+#define MAX_USER_EXTENSIONS 16
+static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from from, u64 extension,
+ int ext_number, struct xe_oa_open_param *param)
+{
+ u64 __user *address = u64_to_user_ptr(extension);
+ struct drm_xe_user_extension ext;
+ int err;
+ u32 idx;
+
+ if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS))
+ return -E2BIG;
+
+ err = __copy_from_user(&ext, address, sizeof(ext));
+ if (XE_IOCTL_DBG(oa->xe, err))
+ return -EFAULT;
+
+ if (XE_IOCTL_DBG(oa->xe, ext.pad) ||
+ XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs)))
+ return -EINVAL;
+
+ idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs));
+ err = xe_oa_user_extension_funcs[idx](oa, from, extension, param);
+ if (XE_IOCTL_DBG(oa->xe, err))
+ return err;
+
+ if (ext.next_extension)
+ return xe_oa_user_extensions(oa, from, ext.next_extension, ++ext_number, param);
+
+ return 0;
+}
+
+static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param)
+{
+ int ret, num_syncs, num_ufence = 0;
+
+ if (param->num_syncs && !param->syncs_user) {
+ drm_dbg(&oa->xe->drm, "num_syncs specified without sync array\n");
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (param->num_syncs) {
+ param->syncs = kcalloc(param->num_syncs, sizeof(*param->syncs), GFP_KERNEL);
+ if (!param->syncs) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+ }
+
+ for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) {
+ ret = xe_sync_entry_parse(oa->xe, param->xef, &param->syncs[num_syncs],
+ &param->syncs_user[num_syncs], 0);
+ if (ret)
+ goto err_syncs;
+
+ if (xe_sync_is_ufence(&param->syncs[num_syncs]))
+ num_ufence++;
+ }
+
+ if (XE_IOCTL_DBG(oa->xe, num_ufence > 1)) {
+ ret = -EINVAL;
+ goto err_syncs;
+ }
+
+ return 0;
+
+err_syncs:
+ while (num_syncs--)
+ xe_sync_entry_cleanup(&param->syncs[num_syncs]);
+ kfree(param->syncs);
+exit:
+ return ret;
+}
+
static void xe_oa_stream_enable(struct xe_oa_stream *stream)
{
stream->pollin = false;
@@ -1090,36 +1496,38 @@ static int xe_oa_disable_locked(struct xe_oa_stream *stream)
static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg)
{
- struct drm_xe_ext_set_property ext;
+ struct xe_oa_open_param param = {};
long ret = stream->oa_config->id;
struct xe_oa_config *config;
int err;
- err = __copy_from_user(&ext, u64_to_user_ptr(arg), sizeof(ext));
- if (XE_IOCTL_DBG(stream->oa->xe, err))
- return -EFAULT;
-
- if (XE_IOCTL_DBG(stream->oa->xe, ext.pad) ||
- XE_IOCTL_DBG(stream->oa->xe, ext.base.name != DRM_XE_OA_EXTENSION_SET_PROPERTY) ||
- XE_IOCTL_DBG(stream->oa->xe, ext.base.next_extension) ||
- XE_IOCTL_DBG(stream->oa->xe, ext.property != DRM_XE_OA_PROPERTY_OA_METRIC_SET))
- return -EINVAL;
+ err = xe_oa_user_extensions(stream->oa, XE_OA_USER_EXTN_FROM_CONFIG, arg, 0, &param);
+ if (err)
+ return err;
- config = xe_oa_get_oa_config(stream->oa, ext.value);
+ config = xe_oa_get_oa_config(stream->oa, param.metric_set);
if (!config)
return -ENODEV;
- if (config != stream->oa_config) {
- err = xe_oa_emit_oa_config(stream, config);
- if (!err)
- config = xchg(&stream->oa_config, config);
- else
- ret = err;
+ param.xef = stream->xef;
+ err = xe_oa_parse_syncs(stream->oa, &param);
+ if (err)
+ goto err_config_put;
+
+ stream->num_syncs = param.num_syncs;
+ stream->syncs = param.syncs;
+
+ err = xe_oa_emit_oa_config(stream, config);
+ if (!err) {
+ config = xchg(&stream->oa_config, config);
+ drm_dbg(&stream->oa->xe->drm, "changed to oa config uuid=%s\n",
+ stream->oa_config->uuid);
}
+err_config_put:
xe_oa_config_put(config);
- return ret;
+ return err ?: ret;
}
static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg)
@@ -1349,6 +1757,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
{
struct xe_oa_unit *u = param->hwe->oa_unit;
struct xe_gt *gt = param->hwe->gt;
+ unsigned int fw_ref;
int ret;
stream->exec_q = param->exec_q;
@@ -1362,6 +1771,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
stream->period_exponent = param->period_exponent;
stream->no_preempt = param->no_preempt;
+ stream->xef = xe_file_get(param->xef);
+ stream->num_syncs = param->num_syncs;
+ stream->syncs = param->syncs;
+
/*
* For Xe2+, when overrun mode is enabled, there are no partial reports at the end
* of buffer, making the OA buffer effectively a non-power-of-2 size circular
@@ -1409,7 +1822,11 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
/* Take runtime pm ref and forcewake to disable RC6 */
xe_pm_runtime_get(stream->oa->xe);
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ ret = -ETIMEDOUT;
+ goto err_fw_put;
+ }
ret = xe_oa_alloc_oa_buffer(stream);
if (ret)
@@ -1451,13 +1868,14 @@ err_put_k_exec_q:
err_free_oa_buf:
xe_oa_free_oa_buffer(stream);
err_fw_put:
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_pm_runtime_put(stream->oa->xe);
if (stream->override_gucrc)
xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(&gt->uc.guc.pc));
err_free_configs:
xe_oa_free_configs(stream);
exit:
+ xe_file_put(stream->xef);
return ret;
}
@@ -1535,7 +1953,7 @@ u32 xe_oa_timestamp_frequency(struct xe_gt *gt)
case XE_PVC:
case XE_METEORLAKE:
xe_pm_runtime_get(gt_to_xe(gt));
- reg = xe_mmio_read32(gt, RPM_CONFIG0);
+ reg = xe_mmio_read32(&gt->mmio, RPM_CONFIG0);
xe_pm_runtime_put(gt_to_xe(gt));
shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg);
@@ -1567,27 +1985,6 @@ static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type)
}
}
-static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name)
-{
- u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt);
- u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt);
- u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt);
- u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt);
- int idx;
-
- for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) {
- const struct xe_oa_format *f = &oa->oa_formats[idx];
-
- if (counter_size == f->counter_size && bc_report == f->bc_report &&
- type == f->type && counter_sel == f->counter_select) {
- *name = idx;
- return 0;
- }
- }
-
- return -EINVAL;
-}
-
/**
* xe_oa_unit_id - Return OA unit ID for a hardware engine
* @hwe: @xe_hw_engine
@@ -1634,155 +2031,6 @@ out:
return ret;
}
-static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- if (value >= oa->oa_unit_ids) {
- drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value);
- return -EINVAL;
- }
- param->oa_unit_id = value;
- return 0;
-}
-
-static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->sample = value;
- return 0;
-}
-
-static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->metric_set = value;
- return 0;
-}
-
-static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- int ret = decode_oa_format(oa, value, &param->oa_format);
-
- if (ret) {
- drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value);
- return ret;
- }
- return 0;
-}
-
-static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
-#define OA_EXPONENT_MAX 31
-
- if (value > OA_EXPONENT_MAX) {
- drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX);
- return -EINVAL;
- }
- param->period_exponent = value;
- return 0;
-}
-
-static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->disabled = value;
- return 0;
-}
-
-static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->exec_queue_id = value;
- return 0;
-}
-
-static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->engine_instance = value;
- return 0;
-}
-
-static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->no_preempt = value;
- return 0;
-}
-
-typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param);
-static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = {
- [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id,
- [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa,
- [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set,
- [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format,
- [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent,
- [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled,
- [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id,
- [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance,
- [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt,
-};
-
-static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension,
- struct xe_oa_open_param *param)
-{
- u64 __user *address = u64_to_user_ptr(extension);
- struct drm_xe_ext_set_property ext;
- int err;
- u32 idx;
-
- err = __copy_from_user(&ext, address, sizeof(ext));
- if (XE_IOCTL_DBG(oa->xe, err))
- return -EFAULT;
-
- if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs)) ||
- XE_IOCTL_DBG(oa->xe, ext.pad))
- return -EINVAL;
-
- idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs));
- return xe_oa_set_property_funcs[idx](oa, ext.value, param);
-}
-
-typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, u64 extension,
- struct xe_oa_open_param *param);
-static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = {
- [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property,
-};
-
-#define MAX_USER_EXTENSIONS 16
-static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number,
- struct xe_oa_open_param *param)
-{
- u64 __user *address = u64_to_user_ptr(extension);
- struct drm_xe_user_extension ext;
- int err;
- u32 idx;
-
- if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS))
- return -E2BIG;
-
- err = __copy_from_user(&ext, address, sizeof(ext));
- if (XE_IOCTL_DBG(oa->xe, err))
- return -EFAULT;
-
- if (XE_IOCTL_DBG(oa->xe, ext.pad) ||
- XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs)))
- return -EINVAL;
-
- idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs));
- err = xe_oa_user_extension_funcs[idx](oa, extension, param);
- if (XE_IOCTL_DBG(oa->xe, err))
- return err;
-
- if (ext.next_extension)
- return xe_oa_user_extensions(oa, ext.next_extension, ++ext_number, param);
-
- return 0;
-}
-
/**
* xe_oa_stream_open_ioctl - Opens an OA stream
* @dev: @drm_device
@@ -1808,7 +2056,8 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
return -ENODEV;
}
- ret = xe_oa_user_extensions(oa, data, 0, &param);
+ param.xef = xef;
+ ret = xe_oa_user_extensions(oa, XE_OA_USER_EXTN_FROM_OPEN, data, 0, &param);
if (ret)
return ret;
@@ -1876,11 +2125,24 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz);
}
+ ret = xe_oa_parse_syncs(oa, &param);
+ if (ret)
+ goto err_exec_q;
+
mutex_lock(&param.hwe->gt->oa.gt_lock);
ret = xe_oa_stream_open_ioctl_locked(oa, &param);
mutex_unlock(&param.hwe->gt->oa.gt_lock);
+ if (ret < 0)
+ goto err_sync_cleanup;
+
+ return ret;
+
+err_sync_cleanup:
+ while (param.num_syncs--)
+ xe_sync_entry_cleanup(&param.syncs[param.num_syncs]);
+ kfree(param.syncs);
err_exec_q:
- if (ret < 0 && param.exec_q)
+ if (param.exec_q)
xe_exec_queue_put(param.exec_q);
return ret;
}
@@ -2351,7 +2613,7 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt)
}
/* Ensure MMIO trigger remains disabled till there is a stream */
- xe_mmio_write32(gt, u->regs.oa_debug,
+ xe_mmio_write32(&gt->mmio, u->regs.oa_debug,
oag_configure_mmio_trigger(NULL, false));
/* Set oa_unit_ids now to ensure ids remain contiguous */
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 8862eca73fbe..fea9d981e414 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -238,5 +238,17 @@ struct xe_oa_stream {
/** @no_preempt: Whether preemption and timeslicing is disabled for stream exec_q */
u32 no_preempt;
+
+ /** @xef: xe_file with which the stream was opened */
+ struct xe_file *xef;
+
+ /** @last_fence: fence to use in stream destroy when needed */
+ struct dma_fence *last_fence;
+
+ /** @num_syncs: size of @syncs array */
+ u32 num_syncs;
+
+ /** @syncs: syncs to wait on and to signal */
+ struct xe_sync_entry *syncs;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index f291a1730024..30fdbdb9341e 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -100,6 +100,10 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = {
* Reserved entries should be programmed with the maximum caching, minimum
* coherency (which matches an all-0's encoding), so we can just omit them
* in the table.
+ *
+ * Note: There is an implicit assumption in the driver that compression and
+ * coh_1way+ are mutually exclusive. If this is ever not true then userptr
+ * and imported dma-buf from external device will have uncleared ccs state.
*/
#define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, __coh_mode) \
{ \
@@ -109,7 +113,8 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = {
REG_FIELD_PREP(XE2_L3_POLICY, l3_policy) | \
REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \
REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \
- .coh_mode = __coh_mode ? XE_COH_AT_LEAST_1WAY : XE_COH_NONE \
+ .coh_mode = (BUILD_BUG_ON_ZERO(__coh_mode && comp_en) || __coh_mode) ? \
+ XE_COH_AT_LEAST_1WAY : XE_COH_NONE \
}
static const struct xe_pat_table_entry xe2_pat_table[] = {
@@ -160,7 +165,7 @@ static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[
for (int i = 0; i < n_entries; i++) {
struct xe_reg reg = XE_REG(_PAT_INDEX(i));
- xe_mmio_write32(gt, reg, table[i].value);
+ xe_mmio_write32(&gt->mmio, reg, table[i].value);
}
}
@@ -177,25 +182,24 @@ static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry ta
static void xelp_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- int i, err;
+ unsigned int fw_ref;
+ int i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- goto err_fw;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return;
drm_printf(p, "PAT table:\n");
for (i = 0; i < xe->pat.n_entries; i++) {
- u32 pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i)));
+ u32 pat = xe_mmio_read32(&gt->mmio, XE_REG(_PAT_INDEX(i)));
u8 mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat);
drm_printf(p, "PAT[%2d] = %s (%#8x)\n", i,
XELP_MEM_TYPE_STR_MAP[mem_type], pat);
}
- err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-err_fw:
- xe_assert(xe, !err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static const struct xe_pat_ops xelp_pat_ops = {
@@ -206,11 +210,12 @@ static const struct xe_pat_ops xelp_pat_ops = {
static void xehp_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- int i, err;
+ unsigned int fw_ref;
+ int i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- goto err_fw;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return;
drm_printf(p, "PAT table:\n");
@@ -224,9 +229,7 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p)
XELP_MEM_TYPE_STR_MAP[mem_type], pat);
}
- err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-err_fw:
- xe_assert(xe, !err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static const struct xe_pat_ops xehp_pat_ops = {
@@ -237,11 +240,12 @@ static const struct xe_pat_ops xehp_pat_ops = {
static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- int i, err;
+ unsigned int fw_ref;
+ int i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- goto err_fw;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return;
drm_printf(p, "PAT table:\n");
@@ -253,9 +257,7 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p)
REG_FIELD_GET(XEHPC_CLOS_LEVEL_MASK, pat), pat);
}
- err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-err_fw:
- xe_assert(xe, !err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static const struct xe_pat_ops xehpc_pat_ops = {
@@ -266,11 +268,12 @@ static const struct xe_pat_ops xehpc_pat_ops = {
static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- int i, err;
+ unsigned int fw_ref;
+ int i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- goto err_fw;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return;
drm_printf(p, "PAT table:\n");
@@ -278,7 +281,7 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
u32 pat;
if (xe_gt_is_media_type(gt))
- pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i)));
+ pat = xe_mmio_read32(&gt->mmio, XE_REG(_PAT_INDEX(i)));
else
pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
@@ -287,9 +290,7 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
REG_FIELD_GET(XELPG_INDEX_COH_MODE_MASK, pat), pat);
}
- err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-err_fw:
- xe_assert(xe, !err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
/*
@@ -316,27 +317,28 @@ static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry
int n_entries)
{
program_pat(gt, table, n_entries);
- xe_mmio_write32(gt, XE_REG(_PAT_ATS), xe2_pat_ats.value);
+ xe_mmio_write32(&gt->mmio, XE_REG(_PAT_ATS), xe2_pat_ats.value);
if (IS_DGFX(gt_to_xe(gt)))
- xe_mmio_write32(gt, XE_REG(_PAT_PTA), xe2_pat_pta.value);
+ xe_mmio_write32(&gt->mmio, XE_REG(_PAT_PTA), xe2_pat_pta.value);
}
static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- int i, err;
+ unsigned int fw_ref;
u32 pat;
+ int i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- goto err_fw;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return;
drm_printf(p, "PAT table:\n");
for (i = 0; i < xe->pat.n_entries; i++) {
if (xe_gt_is_media_type(gt))
- pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i)));
+ pat = xe_mmio_read32(&gt->mmio, XE_REG(_PAT_INDEX(i)));
else
pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
@@ -355,7 +357,7 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
* PPGTT entries.
*/
if (xe_gt_is_media_type(gt))
- pat = xe_mmio_read32(gt, XE_REG(_PAT_PTA));
+ pat = xe_mmio_read32(&gt->mmio, XE_REG(_PAT_PTA));
else
pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA));
@@ -369,9 +371,7 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
REG_FIELD_GET(XE2_COH_MODE, pat),
pat);
- err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-err_fw:
- xe_assert(xe, !err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static const struct xe_pat_ops xe2_pat_ops = {
@@ -382,7 +382,7 @@ static const struct xe_pat_ops xe2_pat_ops = {
void xe_pat_init_early(struct xe_device *xe)
{
- if (GRAPHICS_VER(xe) == 20) {
+ if (GRAPHICS_VER(xe) == 30 || GRAPHICS_VER(xe) == 20) {
xe->pat.ops = &xe2_pat_ops;
xe->pat.table = xe2_pat_table;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 5e962e72c97e..e6640283893f 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -13,7 +13,7 @@
#include <drm/drm_color_mgmt.h>
#include <drm/drm_drv.h>
-#include <drm/intel/xe_pciids.h>
+#include <drm/intel/pciids.h>
#include "display/xe_display.h"
#include "regs/xe_gt_regs.h"
@@ -103,7 +103,6 @@ static const struct xe_graphics_desc graphics_xelpp = {
#define XE_HP_FEATURES \
.has_range_tlb_invalidation = true, \
- .has_flat_ccs = true, \
.dma_mask_size = 46, \
.va_bits = 48, \
.vm_max_level = 3
@@ -120,6 +119,8 @@ static const struct xe_graphics_desc graphics_xehpg = {
XE_HP_FEATURES,
.vram_flags = XE_VRAM_FLAGS_NEED64K,
+
+ .has_flat_ccs = 1,
};
static const struct xe_graphics_desc graphics_xehpc = {
@@ -145,7 +146,6 @@ static const struct xe_graphics_desc graphics_xehpc = {
.has_asid = 1,
.has_atomic_enable_pte_bit = 1,
- .has_flat_ccs = 0,
.has_usm = 1,
};
@@ -156,7 +156,6 @@ static const struct xe_graphics_desc graphics_xelpg = {
BIT(XE_HW_ENGINE_CCS0),
XE_HP_FEATURES,
- .has_flat_ccs = 0,
};
#define XE2_GFX_FEATURES \
@@ -209,7 +208,7 @@ static const struct xe_media_desc media_xelpmp = {
};
static const struct xe_media_desc media_xe2 = {
- .name = "Xe2_LPM / Xe2_HPM",
+ .name = "Xe2_LPM / Xe2_HPM / Xe3_LPM",
.hw_engine_mask =
GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) |
GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0) |
@@ -234,7 +233,7 @@ static const struct xe_device_desc rkl_desc = {
.require_force_probe = true,
};
-static const u16 adls_rpls_ids[] = { XE_RPLS_IDS(NOP), 0 };
+static const u16 adls_rpls_ids[] = { INTEL_RPLS_IDS(NOP), 0 };
static const struct xe_device_desc adl_s_desc = {
.graphics = &graphics_xelp,
@@ -249,7 +248,7 @@ static const struct xe_device_desc adl_s_desc = {
},
};
-static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 };
+static const u16 adlp_rplu_ids[] = { INTEL_RPLU_IDS(NOP), 0 };
static const struct xe_device_desc adl_p_desc = {
.graphics = &graphics_xelp,
@@ -286,9 +285,9 @@ static const struct xe_device_desc dg1_desc = {
.require_force_probe = true,
};
-static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 };
-static const u16 dg2_g11_ids[] = { XE_DG2_G11_IDS(NOP), XE_ATS_M75_IDS(NOP), 0 };
-static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 };
+static const u16 dg2_g10_ids[] = { INTEL_DG2_G10_IDS(NOP), INTEL_ATS_M150_IDS(NOP), 0 };
+static const u16 dg2_g11_ids[] = { INTEL_DG2_G11_IDS(NOP), INTEL_ATS_M75_IDS(NOP), 0 };
+static const u16 dg2_g12_ids[] = { INTEL_DG2_G12_IDS(NOP), 0 };
#define DG2_FEATURES \
DGFX_FEATURES, \
@@ -347,6 +346,12 @@ static const struct xe_device_desc bmg_desc = {
.has_heci_cscfi = 1,
};
+static const struct xe_device_desc ptl_desc = {
+ PLATFORM(PANTHERLAKE),
+ .has_display = true,
+ .require_force_probe = true,
+};
+
#undef PLATFORM
__diag_pop();
@@ -357,6 +362,8 @@ static const struct gmdid_map graphics_ip_map[] = {
{ 1274, &graphics_xelpg }, /* Xe_LPG+ */
{ 2001, &graphics_xe2 },
{ 2004, &graphics_xe2 },
+ { 3000, &graphics_xe2 },
+ { 3001, &graphics_xe2 },
};
/* Map of GMD_ID values to media IP */
@@ -364,13 +371,9 @@ static const struct gmdid_map media_ip_map[] = {
{ 1300, &media_xelpmp },
{ 1301, &media_xe2 },
{ 2000, &media_xe2 },
+ { 3000, &media_xe2 },
};
-#define INTEL_VGA_DEVICE(id, info) { \
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, id), \
- PCI_BASE_CLASS_DISPLAY << 16, 0xff << 16, \
- (unsigned long) info }
-
/*
* Make sure any device matches here are from most specific to most
* general. For example, since the Quanta match is based on the subsystem
@@ -378,25 +381,26 @@ static const struct gmdid_map media_ip_map[] = {
* PCI ID matches, otherwise we'll use the wrong info struct above.
*/
static const struct pci_device_id pciidlist[] = {
- XE_TGL_IDS(INTEL_VGA_DEVICE, &tgl_desc),
- XE_RKL_IDS(INTEL_VGA_DEVICE, &rkl_desc),
- XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
- XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
- XE_ADLN_IDS(INTEL_VGA_DEVICE, &adl_n_desc),
- XE_RPLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
- XE_RPLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
- XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc),
- XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc),
- XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc),
- XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc),
- XE_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc),
- XE_BMG_IDS(INTEL_VGA_DEVICE, &bmg_desc),
+ INTEL_TGL_IDS(INTEL_VGA_DEVICE, &tgl_desc),
+ INTEL_RKL_IDS(INTEL_VGA_DEVICE, &rkl_desc),
+ INTEL_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
+ INTEL_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
+ INTEL_ADLN_IDS(INTEL_VGA_DEVICE, &adl_n_desc),
+ INTEL_RPLU_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
+ INTEL_RPLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
+ INTEL_RPLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
+ INTEL_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc),
+ INTEL_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc),
+ INTEL_ARL_IDS(INTEL_VGA_DEVICE, &mtl_desc),
+ INTEL_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc),
+ INTEL_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc),
+ INTEL_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc),
+ INTEL_BMG_IDS(INTEL_VGA_DEVICE, &bmg_desc),
+ INTEL_PTL_IDS(INTEL_VGA_DEVICE, &ptl_desc),
{ }
};
MODULE_DEVICE_TABLE(pci, pciidlist);
-#undef INTEL_VGA_DEVICE
-
/* is device_id present in comma separated list of ids */
static bool device_id_in_list(u16 device_id, const char *devices, bool negative)
{
@@ -467,13 +471,15 @@ enum xe_gmdid_type {
static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid)
{
- struct xe_gt *gt = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
struct xe_reg gmdid_reg = GMD_ID;
u32 val;
KUNIT_STATIC_STUB_REDIRECT(read_gmdid, xe, type, ver, revid);
if (IS_SRIOV_VF(xe)) {
+ struct xe_gt *gt = xe_root_mmio_gt(xe);
+
/*
* To get the value of the GMDID register, VFs must obtain it
* from the GuC using MMIO communication.
@@ -509,14 +515,17 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver,
gt->info.type = XE_GT_TYPE_UNINITIALIZED;
} else {
/*
- * We need to apply the GSI offset explicitly here as at this
- * point the xe_gt is not fully uninitialized and only basic
- * access to MMIO registers is possible.
+ * GMD_ID is a GT register, but at this point in the driver
+ * init we haven't fully initialized the GT yet so we need to
+ * read the register with the tile's MMIO accessor. That means
+ * we need to apply the GSI offset manually since it won't get
+ * automatically added as it would if we were using a GT mmio
+ * accessor.
*/
if (type == GMDID_MEDIA)
gmdid_reg.addr += MEDIA_GT_GSI_OFFSET;
- val = xe_mmio_read32(gt, gmdid_reg);
+ val = xe_mmio_read32(mmio, gmdid_reg);
}
*ver = REG_FIELD_GET(GMD_ID_ARCH_MASK, val) * 100 + REG_FIELD_GET(GMD_ID_RELEASE_MASK, val);
@@ -678,7 +687,10 @@ static int xe_info_init(struct xe_device *xe,
xe->info.has_atomic_enable_pte_bit = graphics_desc->has_atomic_enable_pte_bit;
if (xe->info.platform != XE_PVC)
xe->info.has_device_atomics_on_smem = 1;
+
+ /* Runtime detection may change this later */
xe->info.has_flat_ccs = graphics_desc->has_flat_ccs;
+
xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
xe->info.has_usm = graphics_desc->has_usm;
@@ -707,6 +719,7 @@ static int xe_info_init(struct xe_device *xe,
gt->info.type = XE_GT_TYPE_MAIN;
gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state;
gt->info.engine_mask = graphics_desc->hw_engine_mask;
+
if (MEDIA_VER(xe) < 13 && media_desc)
gt->info.engine_mask |= media_desc->hw_engine_mask;
@@ -725,8 +738,6 @@ static int xe_info_init(struct xe_device *xe,
gt->info.type = XE_GT_TYPE_MEDIA;
gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state;
gt->info.engine_mask = media_desc->hw_engine_mask;
- gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET;
- gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH;
/*
* FIXME: At the moment multi-tile and standalone media are
@@ -757,6 +768,25 @@ static void xe_pci_remove(struct pci_dev *pdev)
pci_set_drvdata(pdev, NULL);
}
+/*
+ * Probe the PCI device, initialize various parts of the driver.
+ *
+ * Fault injection is used to test the error paths of some initialization
+ * functions called either directly from xe_pci_probe() or indirectly for
+ * example through xe_device_probe(). Those functions use the kernel fault
+ * injection capabilities infrastructure, see
+ * Documentation/fault-injection/fault-injection.rst for details. The macro
+ * ALLOW_ERROR_INJECTION() is used to conditionally skip function execution
+ * at runtime and use a provided return value. The first requirement for
+ * error injectable functions is proper handling of the error code by the
+ * caller for recovery, which is always the case here. The second
+ * requirement is that no state is changed before the first error return.
+ * It is not strictly fullfilled for all initialization functions using the
+ * ALLOW_ERROR_INJECTION() macro but this is acceptable because for those
+ * error cases at probe time, the error code is simply propagated up by the
+ * caller. Therefore there is no consequence on those specific callers when
+ * function error injection skips the whole function.
+ */
static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
const struct xe_device_desc *desc = (const void *)ent->driver_data;
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index 7397d556996a..d95d9835de42 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -44,7 +44,7 @@ static int pcode_mailbox_status(struct xe_tile *tile)
[PCODE_ERROR_MASK] = {-EPROTO, "Unknown"},
};
- err = xe_mmio_read32(tile->primary_gt, PCODE_MAILBOX) & PCODE_ERROR_MASK;
+ err = xe_mmio_read32(&tile->mmio, PCODE_MAILBOX) & PCODE_ERROR_MASK;
if (err) {
drm_err(&tile_to_xe(tile)->drm, "PCODE Mailbox failed: %d %s", err,
err_decode[err].str ?: "Unknown");
@@ -58,7 +58,7 @@ static int __pcode_mailbox_rw(struct xe_tile *tile, u32 mbox, u32 *data0, u32 *d
unsigned int timeout_ms, bool return_data,
bool atomic)
{
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
int err;
if (tile_to_xe(tile)->info.skip_pcode)
diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h
index 79b7042c4534..d08574c4cdb8 100644
--- a/drivers/gpu/drm/xe/xe_platform_types.h
+++ b/drivers/gpu/drm/xe/xe_platform_types.h
@@ -23,6 +23,7 @@ enum xe_platform {
XE_METEORLAKE,
XE_LUNARLAKE,
XE_BATTLEMAGE,
+ XE_PANTHERLAKE,
};
enum xe_subplatform {
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 33eb039053e4..40f7c844ed44 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -5,6 +5,7 @@
#include "xe_pm.h"
+#include <linux/fault-inject.h>
#include <linux/pm_runtime.h>
#include <drm/drm_managed.h>
@@ -263,6 +264,7 @@ int xe_pm_init_early(struct xe_device *xe)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */
/**
* xe_pm_init - Initialize Xe Power Management
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 848da8e68c7a..170ae72d1a7b 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -9,6 +9,7 @@
#include <linux/sched/clock.h>
#include <drm/ttm/ttm_placement.h>
+#include <generated/xe_wa_oob.h>
#include <uapi/drm/xe_drm.h>
#include "regs/xe_engine_regs.h"
@@ -23,6 +24,7 @@
#include "xe_macros.h"
#include "xe_mmio.h"
#include "xe_ttm_vram_mgr.h"
+#include "xe_wa.h"
static const u16 xe_to_user_engine_class[] = {
[XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER,
@@ -83,24 +85,22 @@ static __ktime_func_t __clock_id_to_func(clockid_t clk_id)
}
static void
-__read_timestamps(struct xe_gt *gt,
- struct xe_reg lower_reg,
- struct xe_reg upper_reg,
- u64 *engine_ts,
- u64 *cpu_ts,
- u64 *cpu_delta,
- __ktime_func_t cpu_clock)
+hwe_read_timestamp(struct xe_hw_engine *hwe, u64 *engine_ts, u64 *cpu_ts,
+ u64 *cpu_delta, __ktime_func_t cpu_clock)
{
+ struct xe_mmio *mmio = &hwe->gt->mmio;
u32 upper, lower, old_upper, loop = 0;
+ struct xe_reg upper_reg = RING_TIMESTAMP_UDW(hwe->mmio_base),
+ lower_reg = RING_TIMESTAMP(hwe->mmio_base);
- upper = xe_mmio_read32(gt, upper_reg);
+ upper = xe_mmio_read32(mmio, upper_reg);
do {
*cpu_delta = local_clock();
*cpu_ts = cpu_clock();
- lower = xe_mmio_read32(gt, lower_reg);
+ lower = xe_mmio_read32(mmio, lower_reg);
*cpu_delta = local_clock() - *cpu_delta;
old_upper = upper;
- upper = xe_mmio_read32(gt, upper_reg);
+ upper = xe_mmio_read32(mmio, upper_reg);
} while (upper != old_upper && loop++ < 2);
*engine_ts = (u64)upper << 32 | lower;
@@ -117,6 +117,7 @@ query_engine_cycles(struct xe_device *xe,
__ktime_func_t cpu_clock;
struct xe_hw_engine *hwe;
struct xe_gt *gt;
+ unsigned int fw_ref;
if (query->size == 0) {
query->size = size;
@@ -149,18 +150,16 @@ query_engine_cycles(struct xe_device *xe,
if (!hwe)
return -EINVAL;
- if (xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL))
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return -EIO;
+ }
- __read_timestamps(gt,
- RING_TIMESTAMP(hwe->mmio_base),
- RING_TIMESTAMP_UDW(hwe->mmio_base),
- &resp.engine_cycles,
- &resp.cpu_timestamp,
- &resp.cpu_delta,
- cpu_clock);
+ hwe_read_timestamp(hwe, &resp.engine_cycles, &resp.cpu_timestamp,
+ &resp.cpu_delta, cpu_clock);
- xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
if (GRAPHICS_VER(xe) >= 20)
resp.width = 64;
@@ -168,16 +167,10 @@ query_engine_cycles(struct xe_device *xe,
resp.width = 36;
/* Only write to the output fields of user query */
- if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp))
- return -EFAULT;
-
- if (put_user(resp.cpu_delta, &query_ptr->cpu_delta))
- return -EFAULT;
-
- if (put_user(resp.engine_cycles, &query_ptr->engine_cycles))
- return -EFAULT;
-
- if (put_user(resp.width, &query_ptr->width))
+ if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp) ||
+ put_user(resp.cpu_delta, &query_ptr->cpu_delta) ||
+ put_user(resp.engine_cycles, &query_ptr->engine_cycles) ||
+ put_user(resp.width, &query_ptr->width))
return -EFAULT;
return 0;
@@ -458,12 +451,23 @@ static int query_hwconfig(struct xe_device *xe,
static size_t calc_topo_query_size(struct xe_device *xe)
{
- return xe->info.gt_count *
- (4 * sizeof(struct drm_xe_query_topology_mask) +
- sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
- sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
- sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask) +
- sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
+ struct xe_gt *gt;
+ size_t query_size = 0;
+ int id;
+
+ for_each_gt(gt, xe, id) {
+ query_size += 3 * sizeof(struct drm_xe_query_topology_mask) +
+ sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
+ sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
+ sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss);
+
+ /* L3bank mask may not be available for some GTs */
+ if (!XE_WA(gt, no_media_l3))
+ query_size += sizeof(struct drm_xe_query_topology_mask) +
+ sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask);
+ }
+
+ return query_size;
}
static int copy_mask(void __user **ptr,
@@ -516,11 +520,18 @@ static int query_gt_topology(struct xe_device *xe,
if (err)
return err;
- topo.type = DRM_XE_TOPO_L3_BANK;
- err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask,
- sizeof(gt->fuse_topo.l3_bank_mask));
- if (err)
- return err;
+ /*
+ * If the kernel doesn't have a way to obtain a correct L3bank
+ * mask, then it's better to omit L3 from the query rather than
+ * reporting bogus or zeroed information to userspace.
+ */
+ if (!XE_WA(gt, no_media_l3)) {
+ topo.type = DRM_XE_TOPO_L3_BANK;
+ err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask,
+ sizeof(gt->fuse_topo.l3_bank_mask));
+ if (err)
+ return err;
+ }
topo.type = gt->fuse_topo.eu_type == XE_GT_EU_TYPE_SIMD16 ?
DRM_XE_TOPO_SIMD16_EU_PER_DSS :
@@ -659,7 +670,7 @@ static int query_oa_units(struct xe_device *xe,
du->oa_unit_id = u->oa_unit_id;
du->oa_unit_type = u->type;
du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt);
- du->capabilities = DRM_XE_OA_CAPS_BASE;
+ du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS;
j = 0;
for_each_hw_engine(hwe, gt, hwe_id) {
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index 440ac572f6e5..e1a0e27cda14 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -15,6 +15,7 @@
#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
+#include "xe_device.h"
#include "xe_device_types.h"
#include "xe_force_wake.h"
#include "xe_gt.h"
@@ -164,7 +165,7 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry)
else if (entry->clr_bits + 1)
val = (reg.mcr ?
xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
- xe_mmio_read32(gt, reg)) & (~entry->clr_bits);
+ xe_mmio_read32(&gt->mmio, reg)) & (~entry->clr_bits);
else
val = 0;
@@ -180,34 +181,34 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry)
if (entry->reg.mcr)
xe_gt_mcr_multicast_write(gt, reg_mcr, val);
else
- xe_mmio_write32(gt, reg, val);
+ xe_mmio_write32(&gt->mmio, reg, val);
}
void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
{
struct xe_reg_sr_entry *entry;
unsigned long reg;
- int err;
+ unsigned int fw_ref;
if (xa_empty(&sr->xa))
return;
xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name);
- err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
goto err_force_wake;
xa_for_each(&sr->xa, reg, entry)
apply_one_mmio(gt, entry);
- err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL);
- XE_WARN_ON(err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return;
err_force_wake:
- xe_gt_err(gt, "Failed to apply, err=%d\n", err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ xe_gt_err(gt, "Failed to apply, err=-ETIMEDOUT\n");
}
void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
@@ -220,15 +221,15 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
u32 mmio_base = hwe->mmio_base;
unsigned long reg;
unsigned int slot = 0;
- int err;
+ unsigned int fw_ref;
if (xa_empty(&sr->xa))
return;
drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name);
- err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
goto err_force_wake;
p = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL);
@@ -241,7 +242,7 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
}
xe_reg_whitelist_print_entry(&p, 0, reg, entry);
- xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot),
+ xe_mmio_write32(&gt->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot),
reg | entry->set_bits);
slot++;
}
@@ -250,16 +251,16 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) {
u32 addr = RING_NOPID(mmio_base).addr;
- xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr);
+ xe_mmio_write32(&gt->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr);
}
- err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL);
- XE_WARN_ON(err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return;
err_force_wake:
- drm_err(&xe->drm, "Failed to apply, err=%d\n", err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ drm_err(&xe->drm, "Failed to apply, err=-ETIMEDOUT\n");
}
/**
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 86c705d18c0d..b13d4d62f0b1 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -196,7 +196,7 @@ static void rtp_get_context(struct xe_rtp_process_ctx *ctx,
*gt = (*hwe)->gt;
*xe = gt_to_xe(*gt);
break;
- };
+ }
}
/**
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
index fe2cb2a96f78..e055bed7ae55 100644
--- a/drivers/gpu/drm/xe/xe_sa.c
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -53,7 +53,7 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32
if (IS_ERR(bo)) {
drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n",
PTR_ERR(bo));
- return (struct xe_sa_manager *)bo;
+ return ERR_CAST(bo);
}
sa_manager->bo = bo;
sa_manager->is_iomem = bo->vmap.is_iomem;
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index eeccc1c318ae..1905ca590965 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -280,7 +280,7 @@ void xe_sched_job_arm(struct xe_sched_job *job)
fence = &chain->base;
}
- job->fence = fence;
+ job->fence = dma_fence_get(fence); /* Pairs with put in scheduler */
drm_sched_job_arm(&job->drm);
}
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
index 0d3f76fb05ce..f13f333f00be 100644
--- a/drivers/gpu/drm/xe/xe_sched_job_types.h
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -40,7 +40,6 @@ struct xe_sched_job {
* @fence: dma fence to indicate completion. 1 way relationship - job
* can safely reference fence, fence cannot safely reference job.
*/
-#define JOB_FLAG_SUBMIT DMA_FENCE_FLAG_USER_BITS
struct dma_fence *fence;
/** @user_fence: write back value when BB is complete */
struct {
@@ -63,7 +62,7 @@ struct xe_sched_job {
struct xe_sched_job_snapshot {
u16 batch_addr_len;
- u64 batch_addr[];
+ u64 batch_addr[] __counted_by(batch_addr_len);
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
index 5a1d65e4f19f..ef10782af656 100644
--- a/drivers/gpu/drm/xe/xe_sriov.c
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -3,6 +3,8 @@
* Copyright © 2023 Intel Corporation
*/
+#include <linux/fault-inject.h>
+
#include <drm/drm_managed.h>
#include "regs/xe_regs.h"
@@ -35,7 +37,7 @@ const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode)
static bool test_is_vf(struct xe_device *xe)
{
- u32 value = xe_mmio_read32(xe_root_mmio_gt(xe), VF_CAP_REG);
+ u32 value = xe_mmio_read32(xe_root_tile_mmio(xe), VF_CAP_REG);
return value & VF_CAP;
}
@@ -119,6 +121,7 @@ int xe_sriov_init(struct xe_device *xe)
return drmm_add_action_or_reset(&xe->drm, fini_sriov, xe);
}
+ALLOW_ERROR_INJECTION(xe_sriov_init, ERRNO); /* See xe_pci_probe() */
/**
* xe_sriov_print_info - Print basic SR-IOV information.
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 2e72c06fd40d..a90480c6aecf 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -83,6 +83,8 @@ static void user_fence_worker(struct work_struct *w)
XE_WARN_ON("Copy to user failed");
kthread_unuse_mm(ufence->mm);
mmput(ufence->mm);
+ } else {
+ drm_dbg(&ufence->xe->drm, "mmget_not_zero() failed, ufence wasn't signaled\n");
}
wake_up_all(&ufence->xe->ufence_wq);
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index dda5268507d8..07cf7cfe4abd 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -3,6 +3,8 @@
* Copyright © 2023 Intel Corporation
*/
+#include <linux/fault-inject.h>
+
#include <drm/drm_managed.h>
#include "xe_device.h"
@@ -129,6 +131,7 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_tile_init_early, ERRNO); /* See xe_pci_probe() */
static int tile_ttm_mgr_init(struct xe_tile *tile)
{
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 8573d7a87d84..91130ad8999c 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -21,6 +21,7 @@
#include "xe_vm.h"
#define __dev_name_xe(xe) dev_name((xe)->drm.dev)
+#define __dev_name_tile(tile) __dev_name_xe(tile_to_xe((tile)))
#define __dev_name_gt(gt) __dev_name_xe(gt_to_xe((gt)))
#define __dev_name_eq(q) __dev_name_gt((q)->gt)
@@ -342,12 +343,12 @@ DEFINE_EVENT(xe_hw_fence, xe_hw_fence_try_signal,
);
TRACE_EVENT(xe_reg_rw,
- TP_PROTO(struct xe_gt *gt, bool write, u32 reg, u64 val, int len),
+ TP_PROTO(struct xe_mmio *mmio, bool write, u32 reg, u64 val, int len),
- TP_ARGS(gt, write, reg, val, len),
+ TP_ARGS(mmio, write, reg, val, len),
TP_STRUCT__entry(
- __string(dev, __dev_name_gt(gt))
+ __string(dev, __dev_name_tile(mmio->tile))
__field(u64, val)
__field(u32, reg)
__field(u16, write)
diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h
index 9b1a1d4304ae..30a3cfbaaa09 100644
--- a/drivers/gpu/drm/xe/xe_trace_bo.h
+++ b/drivers/gpu/drm/xe/xe_trace_bo.h
@@ -189,7 +189,7 @@ DECLARE_EVENT_CLASS(xe_vm,
),
TP_printk("dev=%s, vm=%p, asid=0x%05x", __get_str(dev),
- __entry->vm, __entry->asid)
+ __entry->vm, __entry->asid)
);
DEFINE_EVENT(xe_vm, xe_vm_kill,
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index f7113cf6109d..423856cc18d4 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -60,7 +60,7 @@ bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe)
static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
{
struct xe_tile *tile = xe_device_get_root_tile(xe);
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
u64 stolen_size;
u64 tile_offset;
@@ -94,7 +94,7 @@ static u32 get_wopcm_size(struct xe_device *xe)
u32 wopcm_size;
u64 val;
- val = xe_mmio_read64_2x32(xe_root_mmio_gt(xe), STOLEN_RESERVED);
+ val = xe_mmio_read64_2x32(xe_root_tile_mmio(xe), STOLEN_RESERVED);
val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val);
switch (val) {
@@ -119,7 +119,7 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr
u32 stolen_size, wopcm_size;
u32 ggc, gms;
- ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC);
+ ggc = xe_mmio_read32(xe_root_tile_mmio(xe), GGC);
/*
* Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the
@@ -159,7 +159,7 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr
stolen_size -= wopcm_size;
if (media_gt && XE_WA(media_gt, 14019821291)) {
- u64 gscpsmi_base = xe_mmio_read64_2x32(media_gt, GSCPSMI_BASE)
+ u64 gscpsmi_base = xe_mmio_read64_2x32(&media_gt->mmio, GSCPSMI_BASE)
& ~GENMASK_ULL(5, 0);
/*
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 0d5e04158917..d449de0fb6ec 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -33,7 +33,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
},
{ XE_RTP_NAME("Tuning: L3 cache - media"),
- XE_RTP_RULES(MEDIA_VERSION(2000)),
+ XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
},
@@ -43,7 +43,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
SET(CCCHKNREG1, L3CMPCTRL))
},
{ XE_RTP_NAME("Tuning: Compression Overfetch - media"),
- XE_RTP_RULES(MEDIA_VERSION(2000)),
+ XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX),
SET(XE2LPM_CCCHKNREG1, L3CMPCTRL))
},
@@ -52,7 +52,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN))
},
{ XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"),
- XE_RTP_RULES(MEDIA_VERSION(2000)),
+ XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN))
},
{ XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"),
@@ -61,7 +61,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
COMPMEMRD256BOVRFETCHEN))
},
{ XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"),
- XE_RTP_RULES(MEDIA_VERSION(2000)),
+ XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2,
COMPMEMRD256BOVRFETCHEN))
},
@@ -71,7 +71,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
},
{ XE_RTP_NAME("Tuning: Stateless compression control - media"),
- XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, 2000)),
+ XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT,
REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
},
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index d431d0031185..fb0eda3d5682 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -4,6 +4,7 @@
*/
#include <linux/bitfield.h>
+#include <linux/fault-inject.h>
#include <linux/firmware.h>
#include <drm/drm_managed.h>
@@ -796,6 +797,7 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
return err;
}
+ALLOW_ERROR_INJECTION(xe_uc_fw_init, ERRNO); /* See xe_pci_probe() */
static u32 uc_fw_ggtt_offset(struct xe_uc_fw *uc_fw)
{
@@ -806,6 +808,7 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
{
struct xe_device *xe = uc_fw_to_xe(uc_fw);
struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+ struct xe_mmio *mmio = &gt->mmio;
u64 src_offset;
u32 dma_ctrl;
int ret;
@@ -814,34 +817,34 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
/* Set the source address for the uCode */
src_offset = uc_fw_ggtt_offset(uc_fw) + uc_fw->css_offset;
- xe_mmio_write32(gt, DMA_ADDR_0_LOW, lower_32_bits(src_offset));
- xe_mmio_write32(gt, DMA_ADDR_0_HIGH,
+ xe_mmio_write32(mmio, DMA_ADDR_0_LOW, lower_32_bits(src_offset));
+ xe_mmio_write32(mmio, DMA_ADDR_0_HIGH,
upper_32_bits(src_offset) | DMA_ADDRESS_SPACE_GGTT);
/* Set the DMA destination */
- xe_mmio_write32(gt, DMA_ADDR_1_LOW, offset);
- xe_mmio_write32(gt, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
+ xe_mmio_write32(mmio, DMA_ADDR_1_LOW, offset);
+ xe_mmio_write32(mmio, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
/*
* Set the transfer size. The header plus uCode will be copied to WOPCM
* via DMA, excluding any other components
*/
- xe_mmio_write32(gt, DMA_COPY_SIZE,
+ xe_mmio_write32(mmio, DMA_COPY_SIZE,
sizeof(struct uc_css_header) + uc_fw->ucode_size);
/* Start the DMA */
- xe_mmio_write32(gt, DMA_CTRL,
+ xe_mmio_write32(mmio, DMA_CTRL,
_MASKED_BIT_ENABLE(dma_flags | START_DMA));
/* Wait for DMA to finish */
- ret = xe_mmio_wait32(gt, DMA_CTRL, START_DMA, 0, 100000, &dma_ctrl,
+ ret = xe_mmio_wait32(mmio, DMA_CTRL, START_DMA, 0, 100000, &dma_ctrl,
false);
if (ret)
drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n",
xe_uc_fw_type_repr(uc_fw->type), dma_ctrl);
/* Disable the bits once DMA is over */
- xe_mmio_write32(gt, DMA_CTRL, _MASKED_BIT_DISABLE(dma_flags));
+ xe_mmio_write32(mmio, DMA_CTRL, _MASKED_BIT_DISABLE(dma_flags));
return ret;
}
diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c
index 80ba2fc78837..b1f81dca610d 100644
--- a/drivers/gpu/drm/xe/xe_vram.c
+++ b/drivers/gpu/drm/xe/xe_vram.c
@@ -169,7 +169,7 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size)
u64 offset_hi, offset_lo;
u32 nodes, num_enabled;
- reg = xe_mmio_read32(gt, MIRROR_FUSE3);
+ reg = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3);
nodes = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, reg);
num_enabled = hweight32(nodes); /* Number of enabled l3 nodes */
@@ -185,7 +185,8 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size)
offset = round_up(offset, SZ_128K); /* SW must round up to nearest 128K */
/* We don't expect any holes */
- xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(gt, GSMBASE) - ccs_size),
+ xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(&gt_to_tile(gt)->mmio, GSMBASE) -
+ ccs_size),
"Hole between CCS and GSM.\n");
} else {
reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
@@ -219,8 +220,8 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size,
{
struct xe_device *xe = tile_to_xe(tile);
struct xe_gt *gt = tile->primary_gt;
+ unsigned int fw_ref;
u64 offset;
- int err;
u32 reg;
if (IS_SRIOV_VF(xe)) {
@@ -239,9 +240,9 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size,
return 0;
}
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- return err;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return -ETIMEDOUT;
/* actual size */
if (unlikely(xe->info.platform == XE_DG1)) {
@@ -257,13 +258,15 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size,
if (xe->info.has_flat_ccs) {
offset = get_flat_ccs_offset(gt, *tile_size);
} else {
- offset = xe_mmio_read64_2x32(gt, GSMBASE);
+ offset = xe_mmio_read64_2x32(&tile->mmio, GSMBASE);
}
/* remove the tile offset so we have just the available size */
*vram_size = offset - *tile_offset;
- return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+
+ return 0;
}
static void vram_fini(void *arg)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 353936a0f877..02cf647f86d8 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -8,6 +8,7 @@
#include <drm/drm_managed.h>
#include <kunit/visibility.h>
#include <linux/compiler_types.h>
+#include <linux/fault-inject.h>
#include <generated/xe_wa_oob.h>
@@ -251,6 +252,34 @@ static const struct xe_rtp_entry_sr gt_was[] = {
XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
},
+ /* Xe3_LPG */
+
+ { XE_RTP_NAME("14021871409"),
+ XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0)),
+ XE_RTP_ACTIONS(SET(UNSLCGCTL9454, LSCFE_CLKGATE_DIS))
+ },
+
+ /* Xe3_LPM */
+
+ { XE_RTP_NAME("16021867713"),
+ XE_RTP_RULES(MEDIA_VERSION(3000),
+ ENGINE_CLASS(VIDEO_DECODE)),
+ XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)),
+ XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+ },
+ { XE_RTP_NAME("16021865536"),
+ XE_RTP_RULES(MEDIA_VERSION(3000),
+ ENGINE_CLASS(VIDEO_DECODE)),
+ XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)),
+ XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+ },
+ { XE_RTP_NAME("14021486841"),
+ XE_RTP_RULES(MEDIA_VERSION(3000), MEDIA_STEP(A0, B0),
+ ENGINE_CLASS(VIDEO_DECODE)),
+ XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), RAMDFTUNIT_CLKGATE_DIS)),
+ XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+ },
+
{}
};
@@ -567,6 +596,18 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_ACTION_FLAG(ENGINE_BASE)))
},
+ /* Xe3_LPG */
+
+ { XE_RTP_NAME("14021402888"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE))
+ },
+ { XE_RTP_NAME("18034896535"),
+ XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0),
+ FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
+ },
+
{}
};
@@ -742,6 +783,18 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX))
},
+ /* Xe3_LPG */
+ { XE_RTP_NAME("14021490052"),
+ XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0),
+ ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(FF_MODE,
+ DIS_MESH_PARTIAL_AUTOSTRIP |
+ DIS_MESH_AUTOSTRIP),
+ SET(VFLSKPD,
+ DIS_PARTIAL_AUTOSTRIP |
+ DIS_AUTOSTRIP))
+ },
+
{}
};
@@ -854,6 +907,7 @@ int xe_wa_init(struct xe_gt *gt)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_wa_init, ERRNO); /* See xe_pci_probe() */
void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p)
{
@@ -891,11 +945,11 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p)
*/
void xe_wa_apply_tile_workarounds(struct xe_tile *tile)
{
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
if (IS_SRIOV_VF(tile->xe))
return;
- if (XE_WA(mmio, 22010954014))
+ if (XE_WA(tile->primary_gt, 22010954014))
xe_mmio_rmw32(mmio, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS);
}
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 920ca5060146..bcd04464b85e 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -33,7 +33,11 @@
GRAPHICS_VERSION(2004)
22019338487 MEDIA_VERSION(2000)
GRAPHICS_VERSION(2001)
+ MEDIA_VERSION(3000), MEDIA_STEP(A0, B0)
22019338487_display PLATFORM(LUNARLAKE)
16023588340 GRAPHICS_VERSION(2001)
14019789679 GRAPHICS_VERSION(1255)
GRAPHICS_VERSION_RANGE(1270, 2004)
+no_media_l3 MEDIA_VERSION(3000)
+14022866841 GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0)
+ MEDIA_VERSION(3000), MEDIA_STEP(A0, B0)
diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c
index d3a99157e523..ada0d0aa6b74 100644
--- a/drivers/gpu/drm/xe/xe_wopcm.c
+++ b/drivers/gpu/drm/xe/xe_wopcm.c
@@ -5,6 +5,8 @@
#include "xe_wopcm.h"
+#include <linux/fault-inject.h>
+
#include "regs/xe_guc_regs.h"
#include "xe_device.h"
#include "xe_force_wake.h"
@@ -123,8 +125,8 @@ static bool __check_layout(struct xe_device *xe, u32 wopcm_size,
static bool __wopcm_regs_locked(struct xe_gt *gt,
u32 *guc_wopcm_base, u32 *guc_wopcm_size)
{
- u32 reg_base = xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET);
- u32 reg_size = xe_mmio_read32(gt, GUC_WOPCM_SIZE);
+ u32 reg_base = xe_mmio_read32(&gt->mmio, DMA_GUC_WOPCM_OFFSET);
+ u32 reg_size = xe_mmio_read32(&gt->mmio, GUC_WOPCM_SIZE);
if (!(reg_size & GUC_WOPCM_SIZE_LOCKED) ||
!(reg_base & GUC_WOPCM_OFFSET_VALID))
@@ -150,13 +152,13 @@ static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt,
XE_WARN_ON(size & ~GUC_WOPCM_SIZE_MASK);
mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED;
- err = xe_mmio_write32_and_verify(gt, GUC_WOPCM_SIZE, size, mask,
+ err = xe_mmio_write32_and_verify(&gt->mmio, GUC_WOPCM_SIZE, size, mask,
size | GUC_WOPCM_SIZE_LOCKED);
if (err)
goto err_out;
mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent;
- err = xe_mmio_write32_and_verify(gt, DMA_GUC_WOPCM_OFFSET,
+ err = xe_mmio_write32_and_verify(&gt->mmio, DMA_GUC_WOPCM_OFFSET,
base | huc_agent, mask,
base | huc_agent |
GUC_WOPCM_OFFSET_VALID);
@@ -169,10 +171,10 @@ err_out:
drm_notice(&xe->drm, "Failed to init uC WOPCM registers!\n");
drm_notice(&xe->drm, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET",
DMA_GUC_WOPCM_OFFSET.addr,
- xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET));
+ xe_mmio_read32(&gt->mmio, DMA_GUC_WOPCM_OFFSET));
drm_notice(&xe->drm, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE",
GUC_WOPCM_SIZE.addr,
- xe_mmio_read32(gt, GUC_WOPCM_SIZE));
+ xe_mmio_read32(&gt->mmio, GUC_WOPCM_SIZE));
return err;
}
@@ -268,3 +270,4 @@ check:
return ret;
}
+ALLOW_ERROR_INJECTION(xe_wopcm_init, ERRNO); /* See xe_pci_probe() */