From 676deb38770582abac87447f47d1ee643bb14681 Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Thu, 22 Oct 2020 17:44:55 +0800 Subject: drm/amdgpu: fix the issue of reserving bad pages failed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In amdgpu_ras_reset_gpu, because bad pages may not be freed, it has high probability to reserve bad pages failed. Change to reserve bad pages when freeing VRAM. v2: 1. avoid allocating the drm_mm node outside of amdgpu_vram_mgr.c 2. move bad page reserving into amdgpu_ras_add_bad_pages, if vram mgr reserve bad page failed, it will put it into pending list, otherwise put it into processed list; 3. remove amdgpu_ras_release_bad_pages, because retired page's info has been moved into amdgpu_vram_mgr v3: 1. formate code style; 2. rename amdgpu_vram_reserve_scope as amdgpu_vram_reservation; 3. rename scope_pending as reservations_pending; 4. rename scope_processed as reserved_pages; 5. change to iterate over all the pending ones and try to insert them with drm_mm_reserve_node(); v4: 1. rename amdgpu_vram_mgr_reserve_scope as amdgpu_vram_mgr_reserve_range; 2. remove unused include "amdgpu_ras.h"; 3. rename amdgpu_vram_mgr_check_and_reserve as amdgpu_vram_mgr_do_reserve; 4. refine amdgpu_vram_mgr_reserve_range to call amdgpu_vram_mgr_do_reserve. Reviewed-by: Christian König Reviewed-by: Hawking Zhang Signed-off-by: Dennis Li Signed-off-by: Wenhui Sheng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 106 +++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index a3dd909f78ab..10126a6aa603 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -187,6 +187,8 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) drm_mm_init(&mgr->mm, 0, man->size); spin_lock_init(&mgr->lock); + INIT_LIST_HEAD(&mgr->reservations_pending); + INIT_LIST_HEAD(&mgr->reserved_pages); /* Add the two VRAM-related sysfs files */ ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); @@ -211,6 +213,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; struct ttm_resource_manager *man = &mgr->manager; int ret; + struct amdgpu_vram_reservation *rsv, *temp; ttm_resource_manager_set_used(man, false); @@ -219,6 +222,13 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) return; spin_lock(&mgr->lock); + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) + kfree(rsv); + + list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) { + drm_mm_remove_node(&rsv->mm_node); + kfree(rsv); + } drm_mm_takedown(&mgr->mm); spin_unlock(&mgr->lock); @@ -277,6 +287,101 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) return usage; } +static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man) +{ + struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); + struct amdgpu_device *adev = to_amdgpu_device(mgr); + struct drm_mm *mm = &mgr->mm; + struct amdgpu_vram_reservation *rsv, *temp; + uint64_t vis_usage; + + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) { + if (drm_mm_reserve_node(mm, &rsv->mm_node)) + continue; + + dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Successed\n", + rsv->mm_node.start, rsv->mm_node.size); + + vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node); + atomic64_add(vis_usage, &mgr->vis_usage); + atomic64_add(rsv->mm_node.size << PAGE_SHIFT, &mgr->usage); + list_move(&rsv->node, &mgr->reserved_pages); + } +} + +/** + * amdgpu_vram_mgr_reserve_range - Reserve a range from VRAM + * + * @man: TTM memory type manager + * @start: start address of the range in VRAM + * @size: size of the range + * + * Reserve memory from start addess with the specified size in VRAM + */ +int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man, + uint64_t start, uint64_t size) +{ + struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); + struct amdgpu_vram_reservation *rsv; + + rsv = kzalloc(sizeof(*rsv), GFP_KERNEL); + if (!rsv) + return -ENOMEM; + + INIT_LIST_HEAD(&rsv->node); + rsv->mm_node.start = start >> PAGE_SHIFT; + rsv->mm_node.size = size >> PAGE_SHIFT; + + spin_lock(&mgr->lock); + list_add_tail(&mgr->reservations_pending, &rsv->node); + amdgpu_vram_mgr_do_reserve(man); + spin_unlock(&mgr->lock); + + return 0; +} + +/** + * amdgpu_vram_mgr_query_page_status - query the reservation status + * + * @man: TTM memory type manager + * @start: start address of a page in VRAM + * + * Returns: + * -EBUSY: the page is still hold and in pending list + * 0: the page has been reserved + * -ENOENT: the input page is not a reservation + */ +int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man, + uint64_t start) +{ + struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); + struct amdgpu_vram_reservation *rsv; + int ret; + + spin_lock(&mgr->lock); + + list_for_each_entry(rsv, &mgr->reservations_pending, node) { + if ((rsv->mm_node.start <= start) && + (start < (rsv->mm_node.start + rsv->mm_node.size))) { + ret = -EBUSY; + goto out; + } + } + + list_for_each_entry(rsv, &mgr->reserved_pages, node) { + if ((rsv->mm_node.start <= start) && + (start < (rsv->mm_node.start + rsv->mm_node.size))) { + ret = 0; + goto out; + } + } + + ret = -ENOENT; +out: + spin_unlock(&mgr->lock); + return ret; +} + /** * amdgpu_vram_mgr_virt_start - update virtual start address * @@ -447,6 +552,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes); ++nodes; } + amdgpu_vram_mgr_do_reserve(man); spin_unlock(&mgr->lock); atomic64_sub(usage, &mgr->usage); -- cgit v1.2.3-70-g09d2