summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2024-11-09 05:14:28 +1000
committerDave Airlie <airlied@redhat.com>2024-11-09 05:14:29 +1000
commit1a6bbc4d9e55d6c9df2dfe7d4f2705a544d8ca13 (patch)
treeb2807c1da6f59c51b23d193d255db06cbf572a25
parent9b984a71c240ed9287d6358109f6a0c6ab5bba32 (diff)
parent514447a1219021298329ce586536598c3b4b2dc0 (diff)
Merge tag 'drm-xe-fixes-2024-11-08' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes
Driver Changes: - Fix ccs_mode setting for Xe2 and later (Balasubramani) - Synchronize ccs_mode setting with client creation (Balasubramani) - Apply scheduling WA for LNL in additional places as needed (Nirmoy) - Fix leak and lock handling in error paths of xe_exec ioctl (Matthew Brost) - Fix GGTT allocation leak leading to eventual crash in SR-IOV (Michal Wajdeczko) - Move run_ticks update out of job handling to avoid synchronization with reader (Lucas) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Lucas De Marchi <lucas.demarchi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/4ffcebtluaaaohquxfyf5babpihmtscxwad3jjmt5nggwh2xpm@ztw67ucywttg
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h2
-rw-r--r--drivers/gpu/drm/xe/xe_device.c10
-rw-r--r--drivers/gpu/drm/xe/xe_device.h14
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h9
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c13
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c6
-rw-r--r--drivers/gpu/drm/xe/xe_gt_ccs_mode.c15
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c11
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c2
-rw-r--r--drivers/gpu/drm/xe/xe_wait_user_fence.c7
12 files changed, 54 insertions, 41 deletions
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 00ad34ed73a5..bd604b9f08e4 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -517,7 +517,7 @@
* [4-6] RSVD
* [7] Disabled
*/
-#define CCS_MODE XE_REG(0x14804)
+#define CCS_MODE XE_REG(0x14804, XE_REG_OPTION_MASKED)
#define CCS_MODE_CSLICE_0_3_MASK REG_GENMASK(11, 0) /* 3 bits per cslice */
#define CCS_MODE_CSLICE_MASK 0x7 /* CCS0-3 + rsvd */
#define CCS_MODE_CSLICE_WIDTH ilog2(CCS_MODE_CSLICE_MASK + 1)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 10fd4601b9f2..a1987b554a8d 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -87,10 +87,6 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
mutex_init(&xef->exec_queue.lock);
xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1);
- spin_lock(&xe->clients.lock);
- xe->clients.count++;
- spin_unlock(&xe->clients.lock);
-
file->driver_priv = xef;
kref_init(&xef->refcount);
@@ -107,17 +103,12 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
static void xe_file_destroy(struct kref *ref)
{
struct xe_file *xef = container_of(ref, struct xe_file, refcount);
- struct xe_device *xe = xef->xe;
xa_destroy(&xef->exec_queue.xa);
mutex_destroy(&xef->exec_queue.lock);
xa_destroy(&xef->vm.xa);
mutex_destroy(&xef->vm.lock);
- spin_lock(&xe->clients.lock);
- xe->clients.count--;
- spin_unlock(&xe->clients.lock);
-
xe_drm_client_put(xef->client);
kfree(xef->process_name);
kfree(xef);
@@ -333,7 +324,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
xe->info.force_execlist = xe_modparam.force_execlist;
spin_lock_init(&xe->irq.lock);
- spin_lock_init(&xe->clients.lock);
init_waitqueue_head(&xe->ufence_wq);
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 894f04770454..34620ef855c0 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -178,4 +178,18 @@ void xe_device_declare_wedged(struct xe_device *xe);
struct xe_file *xe_file_get(struct xe_file *xef);
void xe_file_put(struct xe_file *xef);
+/*
+ * Occasionally it is seen that the G2H worker starts running after a delay of more than
+ * a second even after being queued and activated by the Linux workqueue subsystem. This
+ * leads to G2H timeout error. The root cause of issue lies with scheduling latency of
+ * Lunarlake Hybrid CPU. Issue disappears if we disable Lunarlake atom cores from BIOS
+ * and this is beyond xe kmd.
+ *
+ * TODO: Drop this change once workqueue scheduling delay issue is fixed on LNL Hybrid CPU.
+ */
+#define LNL_FLUSH_WORKQUEUE(wq__) \
+ flush_workqueue(wq__)
+#define LNL_FLUSH_WORK(wrk__) \
+ flush_work(wrk__)
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 09d731a9125c..687f3a9039bb 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -353,15 +353,6 @@ struct xe_device {
struct workqueue_struct *wq;
} sriov;
- /** @clients: drm clients info */
- struct {
- /** @clients.lock: Protects drm clients info */
- spinlock_t lock;
-
- /** @clients.count: number of drm clients */
- u64 count;
- } clients;
-
/** @usm: unified memory state */
struct {
/** @usm.asid: convert a ASID to VM */
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index f23ac1e2ed88..756b492f13b0 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -132,12 +132,16 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
if (XE_IOCTL_DBG(xe, !q))
return -ENOENT;
- if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM))
- return -EINVAL;
+ if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM)) {
+ err = -EINVAL;
+ goto err_exec_queue;
+ }
if (XE_IOCTL_DBG(xe, args->num_batch_buffer &&
- q->width != args->num_batch_buffer))
- return -EINVAL;
+ q->width != args->num_batch_buffer)) {
+ err = -EINVAL;
+ goto err_exec_queue;
+ }
if (XE_IOCTL_DBG(xe, q->ops->reset_status(q))) {
err = -ECANCELED;
@@ -220,6 +224,7 @@ retry:
fence = xe_sync_in_fence_get(syncs, num_syncs, q, vm);
if (IS_ERR(fence)) {
err = PTR_ERR(fence);
+ xe_vm_unlock(vm);
goto err_unlock_list;
}
for (i = 0; i < num_syncs; i++)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index d098d2dd1b2d..fd0f3b3c9101 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -260,8 +260,14 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
{
int i;
+ /*
+ * Before releasing our ref to lrc and xef, accumulate our run ticks
+ */
+ xe_exec_queue_update_run_ticks(q);
+
for (i = 0; i < q->width; ++i)
xe_lrc_put(q->lrc[i]);
+
__xe_exec_queue_free(q);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
index d2e4dc3aaf61..ffcbd05671fc 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -68,6 +68,12 @@ static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines)
}
}
+ /*
+ * Mask bits need to be set for the register. Though only Xe2+
+ * platforms require setting of mask bits, it won't harm for older
+ * platforms as these bits are unused there.
+ */
+ mode |= CCS_MODE_CSLICE_0_3_MASK << 16;
xe_mmio_write32(gt, CCS_MODE, mode);
xe_gt_dbg(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n",
@@ -133,9 +139,10 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr,
}
/* CCS mode can only be updated when there are no drm clients */
- spin_lock(&xe->clients.lock);
- if (xe->clients.count) {
- spin_unlock(&xe->clients.lock);
+ mutex_lock(&xe->drm.filelist_mutex);
+ if (!list_empty(&xe->drm.filelist)) {
+ mutex_unlock(&xe->drm.filelist_mutex);
+ xe_gt_dbg(gt, "Rejecting compute mode change as there are active drm clients\n");
return -EBUSY;
}
@@ -146,7 +153,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr,
xe_gt_reset_async(gt);
}
- spin_unlock(&xe->clients.lock);
+ mutex_unlock(&xe->drm.filelist_mutex);
return count;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 8250ef71e685..afdb477ecf83 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -387,6 +387,8 @@ static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node)
* the xe_ggtt_clear() called by below xe_ggtt_remove_node().
*/
xe_ggtt_node_remove(node, false);
+ } else {
+ xe_ggtt_node_fini(node);
}
}
@@ -442,7 +444,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size)
config->ggtt_region = node;
return 0;
err:
- xe_ggtt_node_fini(node);
+ pf_release_ggtt(tile, node);
return err;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index bbb9e411d21f..9d82ea30f4df 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -72,6 +72,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
struct xe_device *xe = gt_to_xe(gt);
struct xe_gt_tlb_invalidation_fence *fence, *next;
+ LNL_FLUSH_WORK(&gt->uc.guc.ct.g2h_worker);
+
spin_lock_irq(&gt->tlb_invalidation.pending_lock);
list_for_each_entry_safe(fence, next,
&gt->tlb_invalidation.pending_fences, link) {
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 17986bfd8818..9c505d3517cd 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -897,17 +897,8 @@ retry_same_fence:
ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
- /*
- * Occasionally it is seen that the G2H worker starts running after a delay of more than
- * a second even after being queued and activated by the Linux workqueue subsystem. This
- * leads to G2H timeout error. The root cause of issue lies with scheduling latency of
- * Lunarlake Hybrid CPU. Issue dissappears if we disable Lunarlake atom cores from BIOS
- * and this is beyond xe kmd.
- *
- * TODO: Drop this change once workqueue scheduling delay issue is fixed on LNL Hybrid CPU.
- */
if (!ret) {
- flush_work(&ct->g2h_worker);
+ LNL_FLUSH_WORK(&ct->g2h_worker);
if (g2h_fence.done) {
xe_gt_warn(gt, "G2H fence %u, action %04x, done\n",
g2h_fence.seqno, action[0]);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index f903b0772722..4f5d00aea716 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -745,8 +745,6 @@ static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
{
struct xe_sched_job *job = to_xe_sched_job(drm_job);
- xe_exec_queue_update_run_ticks(job->q);
-
trace_xe_sched_job_free(job);
xe_sched_job_put(job);
}
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index f5deb81eba01..5b4264ea38bd 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -155,6 +155,13 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
}
if (!timeout) {
+ LNL_FLUSH_WORKQUEUE(xe->ordered_wq);
+ err = do_compare(addr, args->value, args->mask,
+ args->op);
+ if (err <= 0) {
+ drm_dbg(&xe->drm, "LNL_FLUSH_WORKQUEUE resolved ufence timeout\n");
+ break;
+ }
err = -ETIME;
break;
}