diff options
author | David Francis <David.Francis@amd.com> | 2023-07-21 11:14:24 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2023-09-20 16:24:06 -0400 |
commit | 5f248462c69d5ea7b54a88c62edb73f626d1528d (patch) | |
tree | 8840e1bb0fcb471cf1c480513b3390c8e1b1b428 /drivers | |
parent | d92e55565c53eff6d0c549090487d0a1ef704e1c (diff) |
drm/amdgpu: Add EXT_COHERENT memory allocation flags
These flags (for GEM and SVM allocations) allocate
memory that allows for system-scope atomic semantics.
On GFX943 these flags cause caches to be avoided on
non-local memory.
On all other ASICs they are identical in functionality to the
equivalent COHERENT flags.
Corresponding Thunk patch is at
https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/pull/88
Reviewed-by: David Yat Sin <David.YatSin@amd.com>
Signed-off-by: David Francis <David.Francis@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 10 |
6 files changed, 18 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 57189b8bc214..c48ca5adad8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1691,6 +1691,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT) alloc_flags |= AMDGPU_GEM_CREATE_COHERENT; + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT) + alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENT; if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED) alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 12210598e5b8..76b618735dc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -331,6 +331,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) flags |= other->flags & (AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_EXT_COHERENT | AMDGPU_GEM_CREATE_UNCACHED); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index d3da13f4c80e..e582073b57c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -635,6 +635,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, } if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_EXT_COHERENT | AMDGPU_GEM_CREATE_UNCACHED)) *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) | AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index e1f47f9c1881..69f65e9c4f93 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -543,6 +543,7 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev, } if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_EXT_COHERENT | AMDGPU_GEM_CREATE_UNCACHED)) *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) | AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 268ee533e7c1..2936a0fb7527 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1187,7 +1187,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, { struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM; - bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; + bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT | AMDGPU_GEM_CREATE_EXT_COHERENT); + bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT; bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; struct amdgpu_vm *vm = mapping->bo_va->base.vm; unsigned int mtype_local, mtype; @@ -1257,6 +1258,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, snoop = true; if (uncached) { mtype = MTYPE_UC; + } else if (ext_coherent) { + mtype = is_local ? MTYPE_CC : MTYPE_UC; } else if (adev->flags & AMD_IS_APU) { mtype = is_local ? mtype_local : MTYPE_NC; } else { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index ae5660861133..9a1952114e5b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1189,7 +1189,8 @@ svm_range_get_pte_flags(struct kfd_node *node, uint32_t mapping_flags = 0; uint64_t pte_flags; bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN); - bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT; + bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENT); + bool ext_coherent = flags & KFD_IOCTL_SVM_FLAG_EXT_COHERENT; bool uncached = false; /*flags & KFD_IOCTL_SVM_FLAG_UNCACHED;*/ unsigned int mtype_local; @@ -1237,6 +1238,13 @@ svm_range_get_pte_flags(struct kfd_node *node, snoop = true; if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; + } else if (ext_coherent) { + /* local HBM region close to partition */ + if (bo_node->adev == node->adev && + (!bo_node->xcp || !node->xcp || bo_node->xcp->mem_id == node->xcp->mem_id)) + mapping_flags |= AMDGPU_VM_MTYPE_CC; + else + mapping_flags |= AMDGPU_VM_MTYPE_UC; } else if (domain == SVM_RANGE_VRAM_DOMAIN) { /* local HBM region close to partition */ if (bo_node->adev == node->adev && |