From 7ab9ebc580617831355843f19224f1e31bb8e983 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 12 Sep 2024 22:38:12 +0200 Subject: drm/xe/guc: Fix GUC_{SUBMIT,FIRMWARE}_VER helper macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those macros rely on non-existing MAKE_VER_STRUCT macro, while the correct one that should be used is named MAKE_GUC_VER_STRUCT. Fixes: 4eb0aab6e443 ("drm/xe/guc: Bump minimum required GuC version to v70.29.2") Signed-off-by: Michal Wajdeczko Cc: Julia Filipchuk Cc: John Harrison Reviewed-by: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20240912203817.1880-2-michal.wajdeczko@intel.com (cherry picked from commit 02fdf821ed79f59c40d766a85947aa7cc25d4364) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index c3e6b51f7a09..42116b167c98 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -18,8 +18,10 @@ */ #define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) #define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch) -#define GUC_SUBMIT_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]) -#define GUC_FIRMWARE_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE]) +#define GUC_SUBMIT_VER(guc) \ + MAKE_GUC_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]) +#define GUC_FIRMWARE_VER(guc) \ + MAKE_GUC_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE]) struct drm_printer; -- cgit v1.2.3-70-g09d2 From ee06c09ded3c2f722be4e240ed06287e23596bda Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 16 Sep 2024 09:49:12 +0100 Subject: drm/xe/vram: fix ccs offset calculation Spec says SW is expected to round up to the nearest 128K, if not already aligned for the CC unit view of CCS. We are seeing the assert sometimes pop on BMG to tell us that there is a hole between GSM and CCS, as well as popping other asserts with having a vram size with strange alignment, which is likely caused by misaligned offset here. v2 (Shuicheng): - Do the round_up() on final SW address. BSpec: 68023 Fixes: b5c2ca0372dc ("drm/xe/xe2hpg: Determine flat ccs offset for vram") Signed-off-by: Matthew Auld Cc: Himal Prasad Ghimiray Cc: Akshata Jahagirdar Cc: Lucas De Marchi Cc: Shuicheng Lin Cc: Matt Roper Cc: stable@vger.kernel.org # v6.10+ Reviewed-by: Himal Prasad Ghimiray Tested-by: Shuicheng Lin Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240916084911.13119-2-matthew.auld@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 37173392741c425191b959acb3adf70c9a4610c0) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_vram.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index 5bcd59190353..80ba2fc78837 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -182,6 +182,7 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size) offset = offset_hi << 32; /* HW view bits 39:32 */ offset |= offset_lo << 6; /* HW view bits 31:6 */ offset *= num_enabled; /* convert to SW view */ + offset = round_up(offset, SZ_128K); /* SW must round up to nearest 128K */ /* We don't expect any holes */ xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(gt, GSMBASE) - ccs_size), -- cgit v1.2.3-70-g09d2 From 99b1f7493bfa757b03d41ee6d7f7d00f81fcba5d Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 11 Sep 2024 16:55:27 +0100 Subject: drm/xe/client: fix deadlock in show_meminfo() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a real deadlock as well as sleeping in atomic() bug in here, if the bo put happens to be the last ref, since bo destruction wants to grab the same spinlock and sleeping locks. Fix that by dropping the ref using xe_bo_put_deferred(), and moving the final commit outside of the lock. Dropping the lock around the put is tricky since the bo can go out of scope and delete itself from the list, making it difficult to navigate to the next list entry. Fixes: 0845233388f8 ("drm/xe: Implement fdinfo memory stats printing") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2727 Signed-off-by: Matthew Auld Cc: Himal Prasad Ghimiray Cc: Tejas Upadhyay Cc: "Thomas Hellström" Cc: # v6.8+ Reviewed-by: Matthew Brost Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20240911155527.178910-5-matthew.auld@intel.com (cherry picked from commit 0083b8e6f11d7662283a267d4ce7c966812ffd8a) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_drm_client.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index e64f4b645e2e..badfa045ead8 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -196,6 +196,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) struct xe_drm_client *client; struct drm_gem_object *obj; struct xe_bo *bo; + LLIST_HEAD(deferred); unsigned int id; u32 mem_type; @@ -215,11 +216,14 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) list_for_each_entry(bo, &client->bos_list, client_link) { if (!kref_get_unless_zero(&bo->ttm.base.refcount)) continue; + bo_meminfo(bo, stats); - xe_bo_put(bo); + xe_bo_put_deferred(bo, &deferred); } spin_unlock(&client->bos_lock); + xe_bo_put_commit(&deferred); + for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) { if (!xe_mem_type_to_name[mem_type]) continue; -- cgit v1.2.3-70-g09d2 From 69bbe3adf36de47315498c9384f99a1ff9171694 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 11 Sep 2024 16:55:28 +0100 Subject: drm/xe/client: add missing bo locking in show_meminfo() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bo_meminfo() wants to inspect bo state like tt and the ttm resource, however this state can change at any point leading to stuff like NPD and UAF, if the bo lock is not held. Grab the bo lock when calling bo_meminfo(), ensuring we drop any spinlocks first. In the case of object_idr we now also need to hold a ref. v2 (MattB) - Also add xe_bo_assert_held() Fixes: 0845233388f8 ("drm/xe: Implement fdinfo memory stats printing") Signed-off-by: Matthew Auld Cc: Himal Prasad Ghimiray Cc: Tejas Upadhyay Cc: "Thomas Hellström" Cc: # v6.8+ Reviewed-by: Matthew Brost Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20240911155527.178910-6-matthew.auld@intel.com (cherry picked from commit 4f63d712fa104c3ebefcb289d1e733e86d8698c7) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_drm_client.c | 39 +++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index badfa045ead8..95a05c5bc897 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -10,6 +10,7 @@ #include #include +#include "xe_assert.h" #include "xe_bo.h" #include "xe_bo_types.h" #include "xe_device_types.h" @@ -151,10 +152,13 @@ void xe_drm_client_add_bo(struct xe_drm_client *client, */ void xe_drm_client_remove_bo(struct xe_bo *bo) { + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); struct xe_drm_client *client = bo->client; + xe_assert(xe, !kref_read(&bo->ttm.base.refcount)); + spin_lock(&client->bos_lock); - list_del(&bo->client_link); + list_del_init(&bo->client_link); spin_unlock(&client->bos_lock); xe_drm_client_put(client); @@ -166,6 +170,8 @@ static void bo_meminfo(struct xe_bo *bo, u64 sz = bo->size; u32 mem_type; + xe_bo_assert_held(bo); + if (bo->placement.placement) mem_type = bo->placement.placement->mem_type; else @@ -207,7 +213,20 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) idr_for_each_entry(&file->object_idr, obj, id) { struct xe_bo *bo = gem_to_xe_bo(obj); - bo_meminfo(bo, stats); + if (dma_resv_trylock(bo->ttm.base.resv)) { + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + } else { + xe_bo_get(bo); + spin_unlock(&file->table_lock); + + xe_bo_lock(bo, false); + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + + xe_bo_put(bo); + spin_lock(&file->table_lock); + } } spin_unlock(&file->table_lock); @@ -217,7 +236,21 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) if (!kref_get_unless_zero(&bo->ttm.base.refcount)) continue; - bo_meminfo(bo, stats); + if (dma_resv_trylock(bo->ttm.base.resv)) { + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + } else { + spin_unlock(&client->bos_lock); + + xe_bo_lock(bo, false); + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + + spin_lock(&client->bos_lock); + /* The bo ref will prevent this bo from being removed from the list */ + xe_assert(xef->xe, !list_empty(&bo->client_link)); + } + xe_bo_put_deferred(bo, &deferred); } spin_unlock(&client->bos_lock); -- cgit v1.2.3-70-g09d2 From 73d10c7788f6d2b7badf9973afbdea7ca433c15d Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 11 Sep 2024 16:55:29 +0100 Subject: drm/xe/client: use mem_type from the current resource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather extract the mem_type from the current resource. Checking the first potential placement doesn't really tell us where the bo is currently allocated, especially if there are multiple potential placements. Signed-off-by: Matthew Auld Cc: Himal Prasad Ghimiray Cc: Tejas Upadhyay Cc: "Thomas Hellström" Reviewed-by: Matthew Brost Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20240911155527.178910-7-matthew.auld@intel.com (cherry picked from commit fbd73b7d2ae29ef0f604f376bcc22b886a49329e) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_drm_client.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 95a05c5bc897..c4add8b38bbd 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -168,15 +168,10 @@ static void bo_meminfo(struct xe_bo *bo, struct drm_memory_stats stats[TTM_NUM_MEM_TYPES]) { u64 sz = bo->size; - u32 mem_type; + u32 mem_type = bo->ttm.resource->mem_type; xe_bo_assert_held(bo); - if (bo->placement.placement) - mem_type = bo->placement.placement->mem_type; - else - mem_type = XE_PL_TT; - if (drm_gem_object_is_shared_for_memory_stats(&bo->ttm.base)) stats[mem_type].shared += sz; else -- cgit v1.2.3-70-g09d2 From ddc73c465628ab3e60f7eb5b4063b644c18b6336 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 11 Sep 2024 16:55:30 +0100 Subject: drm/xe/bo: add some annotations in bo_put() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the put() triggers bo destroy then there is at least one potential sleeping lock. Also annotate bos_lock and ggtt lock. Signed-off-by: Matthew Auld Cc: Himal Prasad Ghimiray Cc: Tejas Upadhyay Cc: "Thomas Hellström" Reviewed-by: Matthew Brost Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20240911155527.178910-8-matthew.auld@intel.com (cherry picked from commit 3b04c2cfd71c54117237c72f2a08ff0ae1f602e2) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_bo.c | 14 ++++++++++++++ drivers/gpu/drm/xe/xe_bo.h | 6 +----- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 06911e9a3bf5..f379df3a12bf 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -2320,6 +2320,20 @@ void xe_bo_put_commit(struct llist_head *deferred) drm_gem_object_free(&bo->ttm.base.refcount); } +void xe_bo_put(struct xe_bo *bo) +{ + might_sleep(); + if (bo) { +#ifdef CONFIG_PROC_FS + if (bo->client) + might_lock(&bo->client->bos_lock); +#endif + if (bo->ggtt_node && bo->ggtt_node->ggtt) + might_lock(&bo->ggtt_node->ggtt->lock); + drm_gem_object_put(&bo->ttm.base); + } +} + /** * xe_bo_dumb_create - Create a dumb bo as backing for a fb * @file_priv: ... diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index dbfb3209615d..6e4be52306df 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -126,11 +126,7 @@ static inline struct xe_bo *xe_bo_get(struct xe_bo *bo) return bo; } -static inline void xe_bo_put(struct xe_bo *bo) -{ - if (bo) - drm_gem_object_put(&bo->ttm.base); -} +void xe_bo_put(struct xe_bo *bo); static inline void __xe_bo_unset_bulk_move(struct xe_bo *bo) { -- cgit v1.2.3-70-g09d2 From 9460f4bd5970f2e46fe190a0cb9814697bd7f21a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 10 Sep 2024 18:18:20 -0700 Subject: drm/xe: Do not run GPU page fault handler on a closed VM Closing a VM removes page table memory thus we shouldn't touch page tables when a VM is closed. Do not run the GPU page fault handler once the VM is closed to avoid touching page tables. Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240911011820.825127-1-matthew.brost@intel.com (cherry picked from commit f96dbf7c321d70834d46f3aedb75a671e839b51e) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 730eec07795e..00af059a8971 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -212,6 +212,12 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) * TODO: Change to read lock? Using write lock for simplicity. */ down_write(&vm->lock); + + if (xe_vm_is_closed(vm)) { + err = -ENOENT; + goto unlock_vm; + } + vma = lookup_vma(vm, pf->page_addr); if (!vma) { err = -EINVAL; -- cgit v1.2.3-70-g09d2 From 6c10ba06bb1b48acce6d4d9c1e33beb9954f1788 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 12 Sep 2024 08:38:42 -0700 Subject: drm/xe/oa: Fix overflow in oa batch buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By default xe_bb_create_job() appends a MI_BATCH_BUFFER_END to batch buffer, this is not a problem if batch buffer is only used once but oa reuses the batch buffer for the same metric and at each call it appends a MI_BATCH_BUFFER_END, printing the warning below and then overflowing. [ 381.072016] ------------[ cut here ]------------ [ 381.072019] xe 0000:00:02.0: [drm] Assertion `bb->len * 4 + bb_prefetch(q->gt) <= size` failed! platform: LUNARLAKE subplatform: 1 graphics: Xe2_LPG / Xe2_HPG 20.04 step B0 media: Xe2_LPM / Xe2_HPM 20.00 step B0 tile: 0 VRAM 0 B GT: 0 type 1 So here checking if batch buffer already have MI_BATCH_BUFFER_END if not append it. v2: - simply fix, suggestion from Ashutosh Cc: Ashutosh Dixit Signed-off-by: José Roberto de Souza Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240912153842.35813-1-jose.souza@intel.com (cherry picked from commit 9ba0e0f30ca42a98af3689460063edfb6315718a) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_bb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index a13e0b3a169e..ef777dbdf4ec 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -65,7 +65,8 @@ __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr) { u32 size = drm_suballoc_size(bb->bo); - bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + if (bb->len == 0 || bb->cs[bb->len - 1] != MI_BATCH_BUFFER_END) + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size); -- cgit v1.2.3-70-g09d2