summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2024-11-02 04:44:02 +1000
committerDave Airlie <airlied@redhat.com>2024-11-02 04:44:27 +1000
commitf99c7cca2f712d11a67148cfbe463fdefeb82dc5 (patch)
tree93e3e339e145a2ae09891e86c6f94a4e6520fcac
parent427360718e5b9a6e5b5936e2d3d8ae768da54811 (diff)
parentfe05cee4d9533892210e1ee90147175d87e7c053 (diff)
Merge tag 'drm-xe-fixes-2024-10-31' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes
Driver Changes: - Fix missing HPD interrupt enabling, bringing one PM refactor with it (Imre / Maarten) - Workaround LNL GGTT invalidation not being visible to GuC (Matthew Brost) - Avoid getting jobs stuck without a protecting timeout (Matthew Brost) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Lucas De Marchi <lucas.demarchi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/tsbftadm7owyizzdaqnqu7u4tqggxgeqeztlfvmj5fryxlfomi@5m5bfv2zvzmw
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c65
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.h8
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c10
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c18
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c6
5 files changed, 72 insertions, 35 deletions
diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index 75736faf2a80..c6e0c8d77a70 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -309,18 +309,7 @@ static void xe_display_flush_cleanup_work(struct xe_device *xe)
}
/* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */
-void xe_display_pm_runtime_suspend(struct xe_device *xe)
-{
- if (!xe->info.probe_display)
- return;
-
- if (xe->d3cold.allowed)
- xe_display_pm_suspend(xe, true);
-
- intel_hpd_poll_enable(xe);
-}
-
-void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
+static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime)
{
struct intel_display *display = &xe->display;
bool s2idle = suspend_to_idle();
@@ -353,28 +342,38 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold);
intel_dmc_suspend(xe);
+
+ if (runtime && has_display(xe))
+ intel_hpd_poll_enable(xe);
}
-void xe_display_pm_suspend_late(struct xe_device *xe)
+void xe_display_pm_suspend(struct xe_device *xe)
+{
+ __xe_display_pm_suspend(xe, false);
+}
+
+void xe_display_pm_runtime_suspend(struct xe_device *xe)
{
- bool s2idle = suspend_to_idle();
if (!xe->info.probe_display)
return;
- intel_power_domains_suspend(xe, s2idle);
+ if (xe->d3cold.allowed) {
+ __xe_display_pm_suspend(xe, true);
+ return;
+ }
- intel_display_power_suspend_late(xe);
+ intel_hpd_poll_enable(xe);
}
-void xe_display_pm_runtime_resume(struct xe_device *xe)
+void xe_display_pm_suspend_late(struct xe_device *xe)
{
+ bool s2idle = suspend_to_idle();
if (!xe->info.probe_display)
return;
- intel_hpd_poll_disable(xe);
+ intel_power_domains_suspend(xe, s2idle);
- if (xe->d3cold.allowed)
- xe_display_pm_resume(xe, true);
+ intel_display_power_suspend_late(xe);
}
void xe_display_pm_resume_early(struct xe_device *xe)
@@ -387,7 +386,7 @@ void xe_display_pm_resume_early(struct xe_device *xe)
intel_power_domains_resume(xe);
}
-void xe_display_pm_resume(struct xe_device *xe, bool runtime)
+static void __xe_display_pm_resume(struct xe_device *xe, bool runtime)
{
struct intel_display *display = &xe->display;
@@ -411,9 +410,11 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime)
intel_display_driver_resume(xe);
drm_kms_helper_poll_enable(&xe->drm);
intel_display_driver_enable_user_access(xe);
- intel_hpd_poll_disable(xe);
}
+ if (has_display(xe))
+ intel_hpd_poll_disable(xe);
+
intel_opregion_resume(display);
intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false);
@@ -421,6 +422,26 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime)
intel_power_domains_enable(xe);
}
+void xe_display_pm_resume(struct xe_device *xe)
+{
+ __xe_display_pm_resume(xe, false);
+}
+
+void xe_display_pm_runtime_resume(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ if (xe->d3cold.allowed) {
+ __xe_display_pm_resume(xe, true);
+ return;
+ }
+
+ intel_hpd_init(xe);
+ intel_hpd_poll_disable(xe);
+}
+
+
static void display_device_remove(struct drm_device *dev, void *arg)
{
struct xe_device *xe = arg;
diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h
index 53d727fd792b..bed55fd26f30 100644
--- a/drivers/gpu/drm/xe/display/xe_display.h
+++ b/drivers/gpu/drm/xe/display/xe_display.h
@@ -34,10 +34,10 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir);
void xe_display_irq_reset(struct xe_device *xe);
void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt);
-void xe_display_pm_suspend(struct xe_device *xe, bool runtime);
+void xe_display_pm_suspend(struct xe_device *xe);
void xe_display_pm_suspend_late(struct xe_device *xe);
void xe_display_pm_resume_early(struct xe_device *xe);
-void xe_display_pm_resume(struct xe_device *xe, bool runtime);
+void xe_display_pm_resume(struct xe_device *xe);
void xe_display_pm_runtime_suspend(struct xe_device *xe);
void xe_display_pm_runtime_resume(struct xe_device *xe);
@@ -65,10 +65,10 @@ static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir)
static inline void xe_display_irq_reset(struct xe_device *xe) {}
static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {}
-static inline void xe_display_pm_suspend(struct xe_device *xe, bool runtime) {}
+static inline void xe_display_pm_suspend(struct xe_device *xe) {}
static inline void xe_display_pm_suspend_late(struct xe_device *xe) {}
static inline void xe_display_pm_resume_early(struct xe_device *xe) {}
-static inline void xe_display_pm_resume(struct xe_device *xe, bool runtime) {}
+static inline void xe_display_pm_resume(struct xe_device *xe) {}
static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {}
static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {}
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 2895f154654c..ff19eca5d358 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -397,6 +397,16 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
static void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
{
+ struct xe_device *xe = tile_to_xe(ggtt->tile);
+
+ /*
+ * XXX: Barrier for GGTT pages. Unsure exactly why this required but
+ * without this LNL is having issues with the GuC reading scratch page
+ * vs. correct GGTT page. Not particularly a hot code path so blindly
+ * do a mmio read here which results in GuC reading correct GGTT page.
+ */
+ xe_mmio_read32(xe_root_mmio_gt(xe), VF_CAP_REG);
+
/* Each GT in a tile has its own TLB to cache GGTT lookups */
ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt);
ggtt_invalidate_gt_tlb(ggtt->tile->media_gt);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index d333be9c4227..f903b0772722 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -916,12 +916,22 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
{
struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q));
- u32 ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
- u32 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
+ u32 ctx_timestamp, ctx_job_timestamp;
u32 timeout_ms = q->sched_props.job_timeout_ms;
u32 diff;
u64 running_time_ms;
+ if (!xe_sched_job_started(job)) {
+ xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started",
+ xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
+ q->guc->id);
+
+ return xe_sched_invalidate_job(job, 2);
+ }
+
+ ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
+ ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
+
/*
* Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch
* possible overflows with a high timeout.
@@ -1049,10 +1059,6 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
exec_queue_killed_or_banned_or_wedged(q) ||
exec_queue_destroyed(q);
- /* Job hasn't started, can't be timed out */
- if (!skip_timeout_check && !xe_sched_job_started(job))
- goto rearm;
-
/*
* XXX: Sampling timeout doesn't work in wedged mode as we have to
* modify scheduling state to read timestamp. We could read the
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 7cf2160fe040..33eb039053e4 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -123,7 +123,7 @@ int xe_pm_suspend(struct xe_device *xe)
for_each_gt(gt, xe, id)
xe_gt_suspend_prepare(gt);
- xe_display_pm_suspend(xe, false);
+ xe_display_pm_suspend(xe);
/* FIXME: Super racey... */
err = xe_bo_evict_all(xe);
@@ -133,7 +133,7 @@ int xe_pm_suspend(struct xe_device *xe)
for_each_gt(gt, xe, id) {
err = xe_gt_suspend(gt);
if (err) {
- xe_display_pm_resume(xe, false);
+ xe_display_pm_resume(xe);
goto err;
}
}
@@ -187,7 +187,7 @@ int xe_pm_resume(struct xe_device *xe)
for_each_gt(gt, xe, id)
xe_gt_resume(gt);
- xe_display_pm_resume(xe, false);
+ xe_display_pm_resume(xe);
err = xe_bo_restore_user(xe);
if (err)