diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 326 | 
1 files changed, 220 insertions, 106 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 2cd081cbf706..0534ab716809 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -38,7 +38,6 @@  #include <linux/seq_file.h>  #include <linux/slab.h>  #include <linux/swap.h> -#include <linux/swiotlb.h>  #include <linux/dma-buf.h>  #include <linux/sizes.h>  #include <linux/module.h> @@ -65,7 +64,7 @@  MODULE_IMPORT_NS(DMA_BUF); -#define AMDGPU_TTM_VRAM_MAX_DW_READ	(size_t)128 +#define AMDGPU_TTM_VRAM_MAX_DW_READ	((size_t)128)  static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,  				   struct ttm_tt *ttm, @@ -184,11 +183,11 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,  static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,  				 struct ttm_resource *mem,  				 struct amdgpu_res_cursor *mm_cur, -				 unsigned window, struct amdgpu_ring *ring, +				 unsigned int window, struct amdgpu_ring *ring,  				 bool tmz, uint64_t *size, uint64_t *addr)  {  	struct amdgpu_device *adev = ring->adev; -	unsigned offset, num_pages, num_dw, num_bytes; +	unsigned int offset, num_pages, num_dw, num_bytes;  	uint64_t src_addr, dst_addr;  	struct amdgpu_job *job;  	void *cpu_addr; @@ -229,7 +228,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,  	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);  	num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE; -	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, +	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,  				     AMDGPU_FENCE_OWNER_UNDEFINED,  				     num_dw * 4 + num_bytes,  				     AMDGPU_IB_POOL_DELAYED, &job); @@ -384,7 +383,8 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,  	    (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {  		struct dma_fence *wipe_fence = NULL; -		r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence); +		r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence, +					false);  		if (r) {  			goto error;  		} else if (wipe_fence) { @@ -631,6 +631,7 @@ struct amdgpu_ttm_tt {  	struct task_struct	*usertask;  	uint32_t		userflags;  	bool			bound; +	int32_t			pool_id;  };  #define ttm_to_amdgpu_ttm_tt(ptr)	container_of(ptr, struct amdgpu_ttm_tt, ttm) @@ -800,6 +801,44 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,  	sg_free_table(ttm->sg);  } +/* + * total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ... + * MQDn+CtrlStackn where n is the number of XCCs per partition. + * pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD + * and uses memory type default, UC. The rest of pages_per_xcc are + * Ctrl stack and modify their memory type to NC. + */ +static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev, +				struct ttm_tt *ttm, uint64_t flags) +{ +	struct amdgpu_ttm_tt *gtt = (void *)ttm; +	uint64_t total_pages = ttm->num_pages; +	int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp); +	uint64_t page_idx, pages_per_xcc; +	int i; +	uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | +			AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); + +	pages_per_xcc = total_pages; +	do_div(pages_per_xcc, num_xcc); + +	for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) { +		/* MQD page: use default flags */ +		amdgpu_gart_bind(adev, +				gtt->offset + (page_idx << PAGE_SHIFT), +				1, >t->ttm.dma_address[page_idx], flags); +		/* +		 * Ctrl pages - modify the memory type to NC (ctrl_flags) from +		 * the second page of the BO onward. +		 */ +		amdgpu_gart_bind(adev, +				gtt->offset + ((page_idx + 1) << PAGE_SHIFT), +				pages_per_xcc - 1, +				>t->ttm.dma_address[page_idx + 1], +				ctrl_flags); +	} +} +  static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,  				 struct ttm_buffer_object *tbo,  				 uint64_t flags) @@ -812,21 +851,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,  		flags |= AMDGPU_PTE_TMZ;  	if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) { -		uint64_t page_idx = 1; - -		amdgpu_gart_bind(adev, gtt->offset, page_idx, -				 gtt->ttm.dma_address, flags); - -		/* The memory type of the first page defaults to UC. Now -		 * modify the memory type to NC from the second page of -		 * the BO onward. -		 */ -		flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; -		flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); - -		amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT), -				 ttm->num_pages - page_idx, -				 &(gtt->ttm.dma_address[page_idx]), flags); +		amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags);  	} else {  		amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,  				 gtt->ttm.dma_address, flags); @@ -1029,15 +1054,20 @@ static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,  static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,  					   uint32_t page_flags)  { +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);  	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);  	struct amdgpu_ttm_tt *gtt;  	enum ttm_caching caching;  	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL); -	if (gtt == NULL) { +	if (!gtt)  		return NULL; -	} +  	gtt->gobj = &bo->base; +	if (adev->gmc.mem_partitions && abo->xcp_id >= 0) +		gtt->pool_id = KFD_XCP_MEM_ID(adev, abo->xcp_id); +	else +		gtt->pool_id = abo->xcp_id;  	if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)  		caching = ttm_write_combined; @@ -1064,6 +1094,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);  	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); +	struct ttm_pool *pool;  	pgoff_t i;  	int ret; @@ -1078,7 +1109,11 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,  	if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)  		return 0; -	ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx); +	if (adev->mman.ttm_pools && gtt->pool_id >= 0) +		pool = &adev->mman.ttm_pools[gtt->pool_id]; +	else +		pool = &adev->mman.bdev.pool; +	ret = ttm_pool_alloc(pool, ttm, ctx);  	if (ret)  		return ret; @@ -1099,6 +1134,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,  {  	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);  	struct amdgpu_device *adev; +	struct ttm_pool *pool;  	pgoff_t i;  	amdgpu_ttm_backend_unbind(bdev, ttm); @@ -1117,7 +1153,13 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,  		ttm->pages[i]->mapping = NULL;  	adev = amdgpu_ttm_adev(bdev); -	return ttm_pool_free(&adev->mman.bdev.pool, ttm); + +	if (adev->mman.ttm_pools && gtt->pool_id >= 0) +		pool = &adev->mman.ttm_pools[gtt->pool_id]; +	else +		pool = &adev->mman.bdev.pool; + +	return ttm_pool_free(pool, ttm);  }  /** @@ -1414,7 +1456,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,  		memcpy(adev->mman.sdma_access_ptr, buf, len);  	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); -	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, +	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,  				     AMDGPU_FENCE_OWNER_UNDEFINED,  				     num_dw * 4, AMDGPU_IB_POOL_DELAYED,  				     &job); @@ -1623,14 +1665,15 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)  	return 0;  } -static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev) +static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev, +						uint32_t reserve_size)  {  	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;  	memset(ctx, 0, sizeof(*ctx));  	ctx->c2p_train_data_offset = -		ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M); +		ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M);  	ctx->p2c_train_data_offset =  		(adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);  	ctx->train_data_size = @@ -1648,11 +1691,12 @@ static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)   */  static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)  { -	int ret;  	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;  	bool mem_train_support = false; +	uint32_t reserve_size = 0; +	int ret; -	if (!amdgpu_sriov_vf(adev)) { +	if (adev->bios && !amdgpu_sriov_vf(adev)) {  		if (amdgpu_atomfirmware_mem_training_supported(adev))  			mem_train_support = true;  		else @@ -1666,14 +1710,18 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)  	 * Otherwise, fallback to legacy approach to check and reserve tmr block for ip  	 * discovery data and G6 memory training data respectively  	 */ -	adev->mman.discovery_tmr_size = -		amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); -	if (!adev->mman.discovery_tmr_size) -		adev->mman.discovery_tmr_size = DISCOVERY_TMR_OFFSET; +	if (adev->bios) +		reserve_size = +			amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); + +	if (!adev->bios && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) +		reserve_size = max(reserve_size, (uint32_t)280 << 20); +	else if (!reserve_size) +		reserve_size = DISCOVERY_TMR_OFFSET;  	if (mem_train_support) {  		/* reserve vram for mem train according to TMR location */ -		amdgpu_ttm_training_data_block_init(adev); +		amdgpu_ttm_training_data_block_init(adev, reserve_size);  		ret = amdgpu_bo_create_kernel_at(adev,  						 ctx->c2p_train_data_offset,  						 ctx->train_data_size, @@ -1687,20 +1735,58 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)  		ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;  	} -	ret = amdgpu_bo_create_kernel_at(adev, -					 adev->gmc.real_vram_size - adev->mman.discovery_tmr_size, -					 adev->mman.discovery_tmr_size, -					 &adev->mman.discovery_memory, -					 NULL); -	if (ret) { -		DRM_ERROR("alloc tmr failed(%d)!\n", ret); -		amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); -		return ret; +	if (!adev->gmc.is_app_apu) { +		ret = amdgpu_bo_create_kernel_at( +			adev, adev->gmc.real_vram_size - reserve_size, +			reserve_size, &adev->mman.fw_reserved_memory, NULL); +		if (ret) { +			DRM_ERROR("alloc tmr failed(%d)!\n", ret); +			amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, +					      NULL, NULL); +			return ret; +		} +	} else { +		DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n");  	}  	return 0;  } +static int amdgpu_ttm_pools_init(struct amdgpu_device *adev) +{ +	int i; + +	if (!adev->gmc.is_app_apu || !adev->gmc.num_mem_partitions) +		return 0; + +	adev->mman.ttm_pools = kcalloc(adev->gmc.num_mem_partitions, +				       sizeof(*adev->mman.ttm_pools), +				       GFP_KERNEL); +	if (!adev->mman.ttm_pools) +		return -ENOMEM; + +	for (i = 0; i < adev->gmc.num_mem_partitions; i++) { +		ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev, +			      adev->gmc.mem_partitions[i].numa.node, +			      false, false); +	} +	return 0; +} + +static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev) +{ +	int i; + +	if (!adev->gmc.is_app_apu || !adev->mman.ttm_pools) +		return; + +	for (i = 0; i < adev->gmc.num_mem_partitions; i++) +		ttm_pool_fini(&adev->mman.ttm_pools[i]); + +	kfree(adev->mman.ttm_pools); +	adev->mman.ttm_pools = NULL; +} +  /*   * amdgpu_ttm_init - Init the memory management (ttm) as well as various   * gtt/vram related fields. @@ -1727,6 +1813,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);  		return r;  	} + +	r = amdgpu_ttm_pools_init(adev); +	if (r) { +		DRM_ERROR("failed to init ttm pools(%d).\n", r); +		return r; +	}  	adev->mman.initialized = true;  	/* Initialize VRAM pool with all of VRAM divided into pages */ @@ -1744,6 +1836,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  		adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,  				adev->gmc.visible_vram_size); +	else if (adev->gmc.is_app_apu) +		DRM_DEBUG_DRIVER( +			"No need to ioremap when real vram size is 0\n");  	else  #endif  		adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, @@ -1755,9 +1850,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  	 *place on the VRAM, so reserve it early.  	 */  	r = amdgpu_ttm_fw_reserve_vram_init(adev); -	if (r) { +	if (r)  		return r; -	}  	/*  	 *The reserved vram for driver must be pinned to the specified @@ -1781,49 +1875,46 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  	/* allocate memory as required for VGA  	 * This is used for VGA emulation and pre-OS scanout buffers to  	 * avoid display artifacts while transitioning between pre-OS -	 * and driver.  */ -	r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size, -				       &adev->mman.stolen_vga_memory, -				       NULL); -	if (r) -		return r; -	r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size, -				       adev->mman.stolen_extended_size, -				       &adev->mman.stolen_extended_memory, -				       NULL); -	if (r) -		return r; -	r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset, -				       adev->mman.stolen_reserved_size, -				       &adev->mman.stolen_reserved_memory, -				       NULL); -	if (r) -		return r; +	 * and driver. +	 */ +	if (!adev->gmc.is_app_apu) { +		r = amdgpu_bo_create_kernel_at(adev, 0, +					       adev->mman.stolen_vga_size, +					       &adev->mman.stolen_vga_memory, +					       NULL); +		if (r) +			return r; -	DRM_INFO("amdgpu: %uM of VRAM memory ready\n", -		 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); +		r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size, +					       adev->mman.stolen_extended_size, +					       &adev->mman.stolen_extended_memory, +					       NULL); -	/* Compute GTT size, either based on 1/2 the size of RAM size -	 * or whatever the user passed on module init */ -	if (amdgpu_gtt_size == -1) { -		struct sysinfo si; +		if (r) +			return r; -		si_meminfo(&si); -		/* Certain GL unit tests for large textures can cause problems -		 * with the OOM killer since there is no way to link this memory -		 * to a process.  This was originally mitigated (but not necessarily -		 * eliminated) by limiting the GTT size.  The problem is this limit -		 * is often too low for many modern games so just make the limit 1/2 -		 * of system memory which aligns with TTM. The OOM accounting needs -		 * to be addressed, but we shouldn't prevent common 3D applications -		 * from being usable just to potentially mitigate that corner case. -		 */ -		gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), -			       (u64)si.totalram * si.mem_unit / 2); +		r = amdgpu_bo_create_kernel_at(adev, +					       adev->mman.stolen_reserved_offset, +					       adev->mman.stolen_reserved_size, +					       &adev->mman.stolen_reserved_memory, +					       NULL); +		if (r) +			return r;  	} else { -		gtt_size = (uint64_t)amdgpu_gtt_size << 20; +		DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n");  	} +	DRM_INFO("amdgpu: %uM of VRAM memory ready\n", +		 (unsigned int)(adev->gmc.real_vram_size / (1024 * 1024))); + +	/* Compute GTT size, either based on TTM limit +	 * or whatever the user passed on module init. +	 */ +	if (amdgpu_gtt_size == -1) +		gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT; +	else +		gtt_size = (uint64_t)amdgpu_gtt_size << 20; +  	/* Initialize GTT memory pool */  	r = amdgpu_gtt_mgr_init(adev, gtt_size);  	if (r) { @@ -1831,7 +1922,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  		return r;  	}  	DRM_INFO("amdgpu: %uM of GTT memory ready.\n", -		 (unsigned)(gtt_size / (1024 * 1024))); +		 (unsigned int)(gtt_size / (1024 * 1024)));  	/* Initialize preemptible memory pool */  	r = amdgpu_preempt_mgr_init(adev); @@ -1858,7 +1949,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  		DRM_ERROR("Failed initializing oa heap.\n");  		return r;  	} -  	if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,  				AMDGPU_GEM_DOMAIN_GTT,  				&adev->mman.sdma_access_bo, NULL, @@ -1874,18 +1964,24 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  void amdgpu_ttm_fini(struct amdgpu_device *adev)  {  	int idx; +  	if (!adev->mman.initialized)  		return; +	amdgpu_ttm_pools_fini(adev); +  	amdgpu_ttm_training_reserve_vram_fini(adev);  	/* return the stolen vga memory back to VRAM */ -	amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); -	amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); -	/* return the IP Discovery TMR memory back to VRAM */ -	amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); -	if (adev->mman.stolen_reserved_size) -		amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, -				      NULL, NULL); +	if (!adev->gmc.is_app_apu) { +		amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); +		amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); +		/* return the FW reserved memory back to VRAM */ +		amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, +				      NULL); +		if (adev->mman.stolen_reserved_size) +			amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, +					      NULL, NULL); +	}  	amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,  					&adev->mman.sdma_access_ptr);  	amdgpu_ttm_fw_reserve_vram_fini(adev); @@ -1927,7 +2023,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)  	int r;  	if (!adev->mman.initialized || amdgpu_in_reset(adev) || -	    adev->mman.buffer_funcs_enabled == enable) +	    adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)  		return;  	if (enable) { @@ -1936,7 +2032,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)  		ring = adev->mman.buffer_funcs_ring;  		sched = &ring->sched; -		r = drm_sched_entity_init(&adev->mman.entity, +		r = drm_sched_entity_init(&adev->mman.high_pr,  					  DRM_SCHED_PRIORITY_KERNEL, &sched,  					  1, NULL);  		if (r) { @@ -1944,8 +2040,18 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)  				  r);  			return;  		} + +		r = drm_sched_entity_init(&adev->mman.low_pr, +					  DRM_SCHED_PRIORITY_NORMAL, &sched, +					  1, NULL); +		if (r) { +			DRM_ERROR("Failed setting up TTM BO move entity (%d)\n", +				  r); +			goto error_free_entity; +		}  	} else { -		drm_sched_entity_destroy(&adev->mman.entity); +		drm_sched_entity_destroy(&adev->mman.high_pr); +		drm_sched_entity_destroy(&adev->mman.low_pr);  		dma_fence_put(man->move);  		man->move = NULL;  	} @@ -1957,6 +2063,11 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)  		size = adev->gmc.visible_vram_size;  	man->size = size;  	adev->mman.buffer_funcs_enabled = enable; + +	return; + +error_free_entity: +	drm_sched_entity_destroy(&adev->mman.high_pr);  }  static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, @@ -1964,14 +2075,16 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,  				  unsigned int num_dw,  				  struct dma_resv *resv,  				  bool vm_needs_flush, -				  struct amdgpu_job **job) +				  struct amdgpu_job **job, +				  bool delayed)  {  	enum amdgpu_ib_pool_type pool = direct_submit ?  		AMDGPU_IB_POOL_DIRECT :  		AMDGPU_IB_POOL_DELAYED;  	int r; - -	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, +	struct drm_sched_entity *entity = delayed ? &adev->mman.low_pr : +						    &adev->mman.high_pr; +	r = amdgpu_job_alloc_with_ib(adev, entity,  				     AMDGPU_FENCE_OWNER_UNDEFINED,  				     num_dw * 4, pool, job);  	if (r) @@ -1997,10 +2110,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,  		       bool vm_needs_flush, bool tmz)  {  	struct amdgpu_device *adev = ring->adev; -	unsigned num_loops, num_dw; +	unsigned int num_loops, num_dw;  	struct amdgpu_job *job;  	uint32_t max_bytes; -	unsigned i; +	unsigned int i;  	int r;  	if (!direct_submit && !ring->sched.ready) { @@ -2012,7 +2125,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,  	num_loops = DIV_ROUND_UP(byte_count, max_bytes);  	num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);  	r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw, -				   resv, vm_needs_flush, &job); +				   resv, vm_needs_flush, &job, false);  	if (r)  		return r; @@ -2048,7 +2161,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,  			       uint64_t dst_addr, uint32_t byte_count,  			       struct dma_resv *resv,  			       struct dma_fence **fence, -			       bool vm_needs_flush) +			       bool vm_needs_flush, bool delayed)  {  	struct amdgpu_device *adev = ring->adev;  	unsigned int num_loops, num_dw; @@ -2061,7 +2174,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,  	num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);  	num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);  	r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush, -				   &job); +				   &job, delayed);  	if (r)  		return r; @@ -2084,7 +2197,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,  int amdgpu_fill_buffer(struct amdgpu_bo *bo,  			uint32_t src_data,  			struct dma_resv *resv, -			struct dma_fence **f) +			struct dma_fence **f, +			bool delayed)  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);  	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; @@ -2113,7 +2227,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,  			goto error;  		r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv, -					&next, true); +					&next, true, delayed);  		if (r)  			goto error; @@ -2164,7 +2278,7 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type)  static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused)  { -	struct amdgpu_device *adev = (struct amdgpu_device *)m->private; +	struct amdgpu_device *adev = m->private;  	return ttm_pool_debugfs(&adev->mman.bdev.pool, m);  }  | 
