diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 232 | 
1 files changed, 153 insertions, 79 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 71acd577803e..f9bab963a948 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -60,12 +60,6 @@ static const char * const domain_bit_to_string[] = {  static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work); - -static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) -{ -	return (struct amdgpu_device *)kgd; -} -  static bool kfd_mem_is_attached(struct amdgpu_vm *avm,  		struct kgd_mem *mem)  { @@ -126,8 +120,19 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size)  		PAGE_ALIGN(size);  } +/** + * @amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size + * of buffer including any reserved for control structures + * + * @adev: Device to which allocated BO belongs to + * @size: Size of buffer, in bytes, encapsulated by B0. This should be + * equivalent to amdgpu_bo_size(BO) + * @alloc_flag: Flag used in allocating a BO as noted above + * + * Return: returns -ENOMEM in case of error, ZERO otherwise + */  static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, -		uint64_t size, u32 domain, bool sg) +		uint64_t size, u32 alloc_flag)  {  	uint64_t reserved_for_pt =  		ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); @@ -137,20 +142,24 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,  	acc_size = amdgpu_amdkfd_acc_size(size);  	vram_needed = 0; -	if (domain == AMDGPU_GEM_DOMAIN_GTT) { -		/* TTM GTT memory */ +	if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {  		system_mem_needed = acc_size + size;  		ttm_mem_needed = acc_size + size; -	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { -		/* Userptr */ +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { +		system_mem_needed = acc_size; +		ttm_mem_needed = acc_size; +		vram_needed = size; +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {  		system_mem_needed = acc_size + size;  		ttm_mem_needed = acc_size; -	} else { -		/* VRAM and SG */ +	} else if (alloc_flag & +		   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | +		    KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {  		system_mem_needed = acc_size;  		ttm_mem_needed = acc_size; -		if (domain == AMDGPU_GEM_DOMAIN_VRAM) -			vram_needed = size; +	} else { +		pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); +		return -ENOMEM;  	}  	spin_lock(&kfd_mem_limit.mem_limit_lock); @@ -166,64 +175,72 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,  	    (adev->kfd.vram_used + vram_needed >  	     adev->gmc.real_vram_size - reserved_for_pt)) {  		ret = -ENOMEM; -	} else { -		kfd_mem_limit.system_mem_used += system_mem_needed; -		kfd_mem_limit.ttm_mem_used += ttm_mem_needed; -		adev->kfd.vram_used += vram_needed; +		goto release;  	} +	/* Update memory accounting by decreasing available system +	 * memory, TTM memory and GPU memory as computed above +	 */ +	adev->kfd.vram_used += vram_needed; +	kfd_mem_limit.system_mem_used += system_mem_needed; +	kfd_mem_limit.ttm_mem_used += ttm_mem_needed; + +release:  	spin_unlock(&kfd_mem_limit.mem_limit_lock);  	return ret;  }  static void unreserve_mem_limit(struct amdgpu_device *adev, -		uint64_t size, u32 domain, bool sg) +		uint64_t size, u32 alloc_flag)  {  	size_t acc_size;  	acc_size = amdgpu_amdkfd_acc_size(size);  	spin_lock(&kfd_mem_limit.mem_limit_lock); -	if (domain == AMDGPU_GEM_DOMAIN_GTT) { + +	if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {  		kfd_mem_limit.system_mem_used -= (acc_size + size);  		kfd_mem_limit.ttm_mem_used -= (acc_size + size); -	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { +		kfd_mem_limit.system_mem_used -= acc_size; +		kfd_mem_limit.ttm_mem_used -= acc_size; +		adev->kfd.vram_used -= size; +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {  		kfd_mem_limit.system_mem_used -= (acc_size + size);  		kfd_mem_limit.ttm_mem_used -= acc_size; -	} else { +	} else if (alloc_flag & +		   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | +		    KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {  		kfd_mem_limit.system_mem_used -= acc_size;  		kfd_mem_limit.ttm_mem_used -= acc_size; -		if (domain == AMDGPU_GEM_DOMAIN_VRAM) { -			adev->kfd.vram_used -= size; -			WARN_ONCE(adev->kfd.vram_used < 0, -				  "kfd VRAM memory accounting unbalanced"); -		} +	} else { +		pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); +		goto release;  	} -	WARN_ONCE(kfd_mem_limit.system_mem_used < 0, -		  "kfd system memory accounting unbalanced"); + +	WARN_ONCE(adev->kfd.vram_used < 0, +		  "KFD VRAM memory accounting unbalanced");  	WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, -		  "kfd TTM memory accounting unbalanced"); +		  "KFD TTM memory accounting unbalanced"); +	WARN_ONCE(kfd_mem_limit.system_mem_used < 0, +		  "KFD system memory accounting unbalanced"); +release:  	spin_unlock(&kfd_mem_limit.mem_limit_lock);  }  void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); -	u32 domain = bo->preferred_domains; -	bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU); - -	if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) { -		domain = AMDGPU_GEM_DOMAIN_CPU; -		sg = false; -	} +	u32 alloc_flags = bo->kfd_bo->alloc_flags; +	u64 size = amdgpu_bo_size(bo); -	unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg); +	unreserve_mem_limit(adev, size, alloc_flags);  	kfree(bo->kfd_bo);  } -  /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's   *  reservation object.   * @@ -646,12 +663,6 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,  	if (IS_ERR(gobj))  		return PTR_ERR(gobj); -	/* Import takes an extra reference on the dmabuf. Drop it now to -	 * avoid leaking it. We only need the one reference in -	 * kgd_mem->dmabuf. -	 */ -	dma_buf_put(mem->dmabuf); -  	*bo = gem_to_amdgpu_bo(gobj);  	(*bo)->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;  	(*bo)->parent = amdgpu_bo_ref(mem->bo); @@ -697,10 +708,12 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,  		pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,  			 va + bo_size, vm); -		if (adev == bo_adev || (mem->domain == AMDGPU_GEM_DOMAIN_VRAM && -					amdgpu_xgmi_same_hive(adev, bo_adev))) { -			/* Mappings on the local GPU and VRAM mappings in the -			 * local hive share the original BO +		if (adev == bo_adev || +		   (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) || +		   (mem->domain == AMDGPU_GEM_DOMAIN_VRAM && amdgpu_xgmi_same_hive(adev, bo_adev))) { +			/* Mappings on the local GPU, or VRAM mappings in the +			 * local hive, or userptr mapping IOMMU direct map mode +			 * share the original BO  			 */  			attachment[i]->type = KFD_MEM_ATT_SHARED;  			bo[i] = mem->bo; @@ -1278,12 +1291,60 @@ create_evict_fence_fail:  	return ret;  } -int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, +/** + * amdgpu_amdkfd_gpuvm_pin_bo() - Pins a BO using following criteria + * @bo: Handle of buffer object being pinned + * @domain: Domain into which BO should be pinned + * + *   - USERPTR BOs are UNPINNABLE and will return error + *   - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their + *     PIN count incremented. It is valid to PIN a BO multiple times + * + * Return: ZERO if successful in pinning, Non-Zero in case of error. + */ +static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain) +{ +	int ret = 0; + +	ret = amdgpu_bo_reserve(bo, false); +	if (unlikely(ret)) +		return ret; + +	ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0); +	if (ret) +		pr_err("Error in Pinning BO to domain: %d\n", domain); + +	amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false); +	amdgpu_bo_unreserve(bo); + +	return ret; +} + +/** + * amdgpu_amdkfd_gpuvm_unpin_bo() - Unpins BO using following criteria + * @bo: Handle of buffer object being unpinned + * + *   - Is a illegal request for USERPTR BOs and is ignored + *   - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their + *     PIN count decremented. Calls to UNPIN must balance calls to PIN + */ +static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo) +{ +	int ret = 0; + +	ret = amdgpu_bo_reserve(bo, false); +	if (unlikely(ret)) +		return; + +	amdgpu_bo_unpin(bo); +	amdgpu_bo_unreserve(bo); +} + +int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,  					   struct file *filp, u32 pasid,  					   void **process_info,  					   struct dma_fence **ef)  { -	struct amdgpu_device *adev = get_amdgpu_device(kgd);  	struct amdgpu_fpriv *drv_priv;  	struct amdgpu_vm *avm;  	int ret; @@ -1359,12 +1420,12 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,  	}  } -void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv) +void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, +					    void *drm_priv)  { -	struct amdgpu_device *adev = get_amdgpu_device(kgd);  	struct amdgpu_vm *avm; -	if (WARN_ON(!kgd || !drm_priv)) +	if (WARN_ON(!adev || !drm_priv))  		return;  	avm = drm_priv_to_vm(drm_priv); @@ -1392,17 +1453,16 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)  }  int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( -		struct kgd_dev *kgd, uint64_t va, uint64_t size, +		struct amdgpu_device *adev, uint64_t va, uint64_t size,  		void *drm_priv, struct kgd_mem **mem,  		uint64_t *offset, uint32_t flags)  { -	struct amdgpu_device *adev = get_amdgpu_device(kgd);  	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);  	enum ttm_bo_type bo_type = ttm_bo_type_device;  	struct sg_table *sg = NULL;  	uint64_t user_addr = 0;  	struct amdgpu_bo *bo; -	struct drm_gem_object *gobj; +	struct drm_gem_object *gobj = NULL;  	u32 domain, alloc_domain;  	u64 alloc_flags;  	int ret; @@ -1460,7 +1520,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(  	amdgpu_sync_create(&(*mem)->sync); -	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg); +	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);  	if (ret) {  		pr_debug("Insufficient memory\n");  		goto err_reserve_limit; @@ -1501,6 +1561,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(  		ret = init_user_pages(*mem, user_addr);  		if (ret)  			goto allocate_init_user_pages_failed; +	} else  if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | +				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { +		ret = amdgpu_amdkfd_gpuvm_pin_bo(bo, AMDGPU_GEM_DOMAIN_GTT); +		if (ret) { +			pr_err("Pinning MMIO/DOORBELL BO during ALLOC FAILED\n"); +			goto err_pin_bo; +		} +		bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; +		bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;  	}  	if (offset) @@ -1509,17 +1578,20 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(  	return 0;  allocate_init_user_pages_failed: +err_pin_bo:  	remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);  	drm_vma_node_revoke(&gobj->vma_node, drm_priv);  err_node_allow: -	drm_gem_object_put(gobj);  	/* Don't unreserve system mem limit twice */  	goto err_reserve_limit;  err_bo_create: -	unreserve_mem_limit(adev, size, alloc_domain, !!sg); +	unreserve_mem_limit(adev, size, flags);  err_reserve_limit:  	mutex_destroy(&(*mem)->lock); -	kfree(*mem); +	if (gobj) +		drm_gem_object_put(gobj); +	else +		kfree(*mem);  err:  	if (sg) {  		sg_free_table(sg); @@ -1529,7 +1601,7 @@ err:  }  int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( -		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, +		struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,  		uint64_t *size)  {  	struct amdkfd_process_info *process_info = mem->process_info; @@ -1542,6 +1614,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(  	bool is_imported = false;  	mutex_lock(&mem->lock); + +	/* Unpin MMIO/DOORBELL BO's that were pinnned during allocation */ +	if (mem->alloc_flags & +	    (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | +	     KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { +		amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo); +	} +  	mapped_to_gpu_memory = mem->mapped_to_gpu_memory;  	is_imported = mem->is_imported;  	mutex_unlock(&mem->lock); @@ -1621,10 +1701,9 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(  }  int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( -		struct kgd_dev *kgd, struct kgd_mem *mem, +		struct amdgpu_device *adev, struct kgd_mem *mem,  		void *drm_priv, bool *table_freed)  { -	struct amdgpu_device *adev = get_amdgpu_device(kgd);  	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);  	int ret;  	struct amdgpu_bo *bo; @@ -1751,7 +1830,7 @@ out:  }  int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( -		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv) +		struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv)  {  	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);  	struct amdkfd_process_info *process_info = avm->process_info; @@ -1812,7 +1891,7 @@ out:  }  int amdgpu_amdkfd_gpuvm_sync_memory( -		struct kgd_dev *kgd, struct kgd_mem *mem, bool intr) +		struct amdgpu_device *adev, struct kgd_mem *mem, bool intr)  {  	struct amdgpu_sync sync;  	int ret; @@ -1828,7 +1907,7 @@ int amdgpu_amdkfd_gpuvm_sync_memory(  	return ret;  } -int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,  		struct kgd_mem *mem, void **kptr, uint64_t *size)  {  	int ret; @@ -1884,7 +1963,8 @@ bo_reserve_failed:  	return ret;  } -void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, struct kgd_mem *mem) +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev, +						  struct kgd_mem *mem)  {  	struct amdgpu_bo *bo = mem->bo; @@ -1894,12 +1974,9 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, struct kg  	amdgpu_bo_unreserve(bo);  } -int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, -					      struct kfd_vm_fault_info *mem) +int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, +					  struct kfd_vm_fault_info *mem)  { -	struct amdgpu_device *adev; - -	adev = (struct amdgpu_device *)kgd;  	if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {  		*mem = *adev->gmc.vm_fault_info;  		mb(); @@ -1908,13 +1985,12 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,  	return 0;  } -int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, +int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,  				      struct dma_buf *dma_buf,  				      uint64_t va, void *drm_priv,  				      struct kgd_mem **mem, uint64_t *size,  				      uint64_t *mmap_offset)  { -	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;  	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);  	struct drm_gem_object *obj;  	struct amdgpu_bo *bo; @@ -2541,11 +2617,9 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)  }  /* Returns GPU-specific tiling mode information */ -int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, +int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,  				struct tile_config *config)  { -	struct amdgpu_device *adev = (struct amdgpu_device *)kgd; -  	config->gb_addr_config = adev->gfx.config.gb_addr_config;  	config->tile_config_ptr = adev->gfx.config.tile_mode_array;  	config->num_tile_configs =  | 
