summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c400
1 files changed, 226 insertions, 174 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index e2eec7b66334..ae6ab93c868b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -35,191 +35,106 @@
#include "amdgpu_display.h"
#include "amdgpu_gem.h"
#include "amdgpu_dma_buf.h"
+#include "amdgpu_xgmi.h"
#include <drm/amdgpu_drm.h>
#include <linux/dma-buf.h>
#include <linux/dma-fence-array.h>
+#include <linux/pci-p2pdma.h>
+#include <linux/pm_runtime.h>
/**
- * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation
- * @obj: GEM BO
+ * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
*
- * Sets up an in-kernel virtual mapping of the BO's memory.
+ * @dmabuf: DMA-buf where we attach to
+ * @attach: attachment to add
*
- * Returns:
- * The virtual address of the mapping or an error pointer.
+ * Add the attachment as user to the exported DMA-buf.
*/
-void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj)
+static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attach)
{
+ struct drm_gem_object *obj = dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- int ret;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ int r;
- ret = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages,
- &bo->dma_buf_vmap);
- if (ret)
- return ERR_PTR(ret);
+ if (pci_p2pdma_distance_many(adev->pdev, &attach->dev, 1, true) < 0)
+ attach->peer2peer = false;
- return bo->dma_buf_vmap.virtual;
-}
+ if (attach->dev->driver == adev->dev->driver)
+ return 0;
-/**
- * amdgpu_gem_prime_vunmap - &dma_buf_ops.vunmap implementation
- * @obj: GEM BO
- * @vaddr: Virtual address (unused)
- *
- * Tears down the in-kernel virtual mapping of the BO's memory.
- */
-void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
-{
- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0)
+ goto out;
+
+ return 0;
- ttm_bo_kunmap(&bo->dma_buf_vmap);
+out:
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
}
/**
- * amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation
- * @obj: GEM BO
- * @vma: Virtual memory area
+ * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation
*
- * Sets up a userspace mapping of the BO's memory in the given
- * virtual memory area.
+ * @dmabuf: DMA-buf where we remove the attachment from
+ * @attach: the attachment to remove
*
- * Returns:
- * 0 on success or a negative error code on failure.
+ * Called when an attachment is removed from the DMA-buf.
*/
-int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
- struct vm_area_struct *vma)
+static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attach)
{
+ struct drm_gem_object *obj = dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- unsigned asize = amdgpu_bo_size(bo);
- int ret;
-
- if (!vma->vm_file)
- return -ENODEV;
- if (adev == NULL)
- return -ENODEV;
-
- /* Check for valid size. */
- if (asize < vma->vm_end - vma->vm_start)
- return -EINVAL;
-
- if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
- (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
- return -EPERM;
- }
- vma->vm_pgoff += amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;
-
- /* prime mmap does not need to check access, so allow here */
- ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file->private_data);
- if (ret)
- return ret;
-
- ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev);
- drm_vma_node_revoke(&obj->vma_node, vma->vm_file->private_data);
-
- return ret;
-}
-
-static int
-__dma_resv_make_exclusive(struct dma_resv *obj)
-{
- struct dma_fence **fences;
- unsigned int count;
- int r;
-
- if (!dma_resv_get_list(obj)) /* no shared fences to convert */
- return 0;
-
- r = dma_resv_get_fences_rcu(obj, NULL, &count, &fences);
- if (r)
- return r;
-
- if (count == 0) {
- /* Now that was unexpected. */
- } else if (count == 1) {
- dma_resv_add_excl_fence(obj, fences[0]);
- dma_fence_put(fences[0]);
- kfree(fences);
- } else {
- struct dma_fence_array *array;
-
- array = dma_fence_array_create(count, fences,
- dma_fence_context_alloc(1), 0,
- false);
- if (!array)
- goto err_fences_put;
-
- dma_resv_add_excl_fence(obj, &array->base);
- dma_fence_put(&array->base);
- }
-
- return 0;
-
-err_fences_put:
- while (count--)
- dma_fence_put(fences[count]);
- kfree(fences);
- return -ENOMEM;
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
}
/**
- * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
+ * amdgpu_dma_buf_pin - &dma_buf_ops.pin implementation
*
- * @dmabuf: DMA-buf where we attach to
- * @attach: attachment to add
+ * @attach: attachment to pin down
*
- * Add the attachment as user to the exported DMA-buf.
+ * Pin the BO which is backing the DMA-buf so that it can't move any more.
*/
-static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
- struct dma_buf_attachment *attach)
+static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
{
- struct drm_gem_object *obj = dmabuf->priv;
+ struct drm_gem_object *obj = attach->dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
int r;
- if (attach->dev->driver == adev->dev->driver)
- return 0;
-
- r = amdgpu_bo_reserve(bo, false);
- if (unlikely(r != 0))
- return r;
-
- /*
- * We only create shared fences for internal use, but importers
- * of the dmabuf rely on exclusive fences for implicitly
- * tracking write hazards. As any of the current fences may
- * correspond to a write, we need to convert all existing
- * fences on the reservation object into a single exclusive
- * fence.
- */
- r = __dma_resv_make_exclusive(bo->tbo.base.resv);
+ /* pin buffer into GTT */
+ r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
if (r)
return r;
- bo->prime_shared_count++;
- amdgpu_bo_unreserve(bo);
+ if (bo->tbo.moving) {
+ r = dma_fence_wait(bo->tbo.moving, true);
+ if (r) {
+ amdgpu_bo_unpin(bo);
+ return r;
+ }
+ }
return 0;
}
/**
- * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation
+ * amdgpu_dma_buf_unpin - &dma_buf_ops.unpin implementation
*
- * @dmabuf: DMA-buf where we remove the attachment from
- * @attach: the attachment to remove
+ * @attach: attachment to unpin
*
- * Called when an attachment is removed from the DMA-buf.
+ * Unpin a previously pinned BO to make it movable again.
*/
-static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
- struct dma_buf_attachment *attach)
+static void amdgpu_dma_buf_unpin(struct dma_buf_attachment *attach)
{
- struct drm_gem_object *obj = dmabuf->priv;
+ struct drm_gem_object *obj = attach->dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
- bo->prime_shared_count--;
+ amdgpu_bo_unpin(bo);
}
/**
@@ -241,27 +156,60 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
struct dma_buf *dma_buf = attach->dmabuf;
struct drm_gem_object *obj = dma_buf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct sg_table *sgt;
long r;
- r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
- if (r)
- return ERR_PTR(r);
+ if (!bo->tbo.pin_count) {
+ /* move buffer into GTT or VRAM */
+ struct ttm_operation_ctx ctx = { false, false };
+ unsigned domains = AMDGPU_GEM_DOMAIN_GTT;
- sgt = drm_prime_pages_to_sg(bo->tbo.ttm->pages, bo->tbo.num_pages);
- if (IS_ERR(sgt))
- return sgt;
+ if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM &&
+ attach->peer2peer) {
+ bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ domains |= AMDGPU_GEM_DOMAIN_VRAM;
+ }
+ amdgpu_bo_placement_from_domain(bo, domains);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r)
+ return ERR_PTR(r);
+
+ } else if (!(amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type) &
+ AMDGPU_GEM_DOMAIN_GTT)) {
+ return ERR_PTR(-EBUSY);
+ }
- if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
- DMA_ATTR_SKIP_CPU_SYNC))
- goto error_free;
+ switch (bo->tbo.resource->mem_type) {
+ case TTM_PL_TT:
+ sgt = drm_prime_pages_to_sg(obj->dev,
+ bo->tbo.ttm->pages,
+ bo->tbo.ttm->num_pages);
+ if (IS_ERR(sgt))
+ return sgt;
+
+ if (dma_map_sgtable(attach->dev, sgt, dir,
+ DMA_ATTR_SKIP_CPU_SYNC))
+ goto error_free;
+ break;
+
+ case TTM_PL_VRAM:
+ r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0,
+ bo->tbo.base.size, attach->dev,
+ dir, &sgt);
+ if (r)
+ return ERR_PTR(r);
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
return sgt;
error_free:
sg_free_table(sgt);
kfree(sgt);
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(-EBUSY);
}
/**
@@ -277,13 +225,13 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach,
struct sg_table *sgt,
enum dma_data_direction dir)
{
- struct drm_gem_object *obj = attach->dmabuf->priv;
- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-
- dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir);
- sg_free_table(sgt);
- kfree(sgt);
- amdgpu_bo_unpin(bo);
+ if (sgt->sgl->page_link) {
+ dma_unmap_sgtable(attach->dev, sgt, dir, 0);
+ sg_free_table(sgt);
+ kfree(sgt);
+ } else {
+ amdgpu_vram_mgr_free_sgt(attach->dev, dir, sgt);
+ }
}
/**
@@ -317,7 +265,8 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
if (unlikely(ret != 0))
return ret;
- if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
+ if (!bo->tbo.pin_count &&
+ (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
}
@@ -327,9 +276,10 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
}
const struct dma_buf_ops amdgpu_dmabuf_ops = {
- .dynamic_mapping = true,
.attach = amdgpu_dma_buf_attach,
.detach = amdgpu_dma_buf_detach,
+ .pin = amdgpu_dma_buf_pin,
+ .unpin = amdgpu_dma_buf_unpin,
.map_dma_buf = amdgpu_dma_buf_map,
.unmap_dma_buf = amdgpu_dma_buf_unmap,
.release = drm_gem_dmabuf_release,
@@ -360,10 +310,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
return ERR_PTR(-EPERM);
buf = drm_gem_prime_export(gobj, flags);
- if (!IS_ERR(buf)) {
- buf->file->f_mapping = gobj->dev->anon_inode->i_mapping;
+ if (!IS_ERR(buf))
buf->ops = &amdgpu_dmabuf_ops;
- }
return buf;
}
@@ -384,30 +332,32 @@ static struct drm_gem_object *
amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
{
struct dma_resv *resv = dma_buf->resv;
- struct amdgpu_device *adev = dev->dev_private;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct drm_gem_object *gobj;
struct amdgpu_bo *bo;
- struct amdgpu_bo_param bp;
+ uint64_t flags = 0;
int ret;
- memset(&bp, 0, sizeof(bp));
- bp.size = dma_buf->size;
- bp.byte_align = PAGE_SIZE;
- bp.domain = AMDGPU_GEM_DOMAIN_CPU;
- bp.flags = 0;
- bp.type = ttm_bo_type_sg;
- bp.resv = resv;
dma_resv_lock(resv, NULL);
- ret = amdgpu_bo_create(adev, &bp, &bo);
+
+ if (dma_buf->ops == &amdgpu_dmabuf_ops) {
+ struct amdgpu_bo *other = gem_to_amdgpu_bo(dma_buf->priv);
+
+ flags |= other->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+ }
+
+ ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_CPU, flags,
+ ttm_bo_type_sg, resv, &gobj);
if (ret)
goto error;
+ bo = gem_to_amdgpu_bo(gobj);
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
- if (dma_buf->ops != &amdgpu_dmabuf_ops)
- bo->prime_shared_count = 1;
dma_resv_unlock(resv);
- return &bo->tbo.base;
+ return gobj;
error:
dma_resv_unlock(resv);
@@ -415,6 +365,74 @@ error:
}
/**
+ * amdgpu_dma_buf_move_notify - &attach.move_notify implementation
+ *
+ * @attach: the DMA-buf attachment
+ *
+ * Invalidate the DMA-buf attachment, making sure that the we re-create the
+ * mapping before the next use.
+ */
+static void
+amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
+{
+ struct drm_gem_object *obj = attach->importer_priv;
+ struct ww_acquire_ctx *ticket = dma_resv_locking_ctx(obj->resv);
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct ttm_operation_ctx ctx = { false, false };
+ struct ttm_placement placement = {};
+ struct amdgpu_vm_bo_base *bo_base;
+ int r;
+
+ if (bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
+ return;
+
+ r = ttm_bo_validate(&bo->tbo, &placement, &ctx);
+ if (r) {
+ DRM_ERROR("Failed to invalidate DMA-buf import (%d))\n", r);
+ return;
+ }
+
+ for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+ struct amdgpu_vm *vm = bo_base->vm;
+ struct dma_resv *resv = vm->root.bo->tbo.base.resv;
+
+ if (ticket) {
+ /* When we get an error here it means that somebody
+ * else is holding the VM lock and updating page tables
+ * So we can just continue here.
+ */
+ r = dma_resv_lock(resv, ticket);
+ if (r)
+ continue;
+
+ } else {
+ /* TODO: This is more problematic and we actually need
+ * to allow page tables updates without holding the
+ * lock.
+ */
+ if (!dma_resv_trylock(resv))
+ continue;
+ }
+
+ r = amdgpu_vm_clear_freed(adev, vm, NULL);
+ if (!r)
+ r = amdgpu_vm_handle_moved(adev, vm);
+
+ if (r && r != -EBUSY)
+ DRM_ERROR("Failed to invalidate VM page tables (%d))\n",
+ r);
+
+ dma_resv_unlock(resv);
+ }
+}
+
+static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops = {
+ .allow_peer2peer = true,
+ .move_notify = amdgpu_dma_buf_move_notify
+};
+
+/**
* amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
* @dev: DRM device
* @dma_buf: Shared DMA buffer
@@ -446,7 +464,8 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
if (IS_ERR(obj))
return obj;
- attach = dma_buf_dynamic_attach(dma_buf, dev->dev, true);
+ attach = dma_buf_dynamic_attach(dma_buf, dev->dev,
+ &amdgpu_dma_buf_attach_ops, obj);
if (IS_ERR(attach)) {
drm_gem_object_put(obj);
return ERR_CAST(attach);
@@ -456,3 +475,36 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
obj->import_attach = attach;
return obj;
}
+
+/**
+ * amdgpu_dmabuf_is_xgmi_accessible - Check if xgmi available for P2P transfer
+ *
+ * @adev: amdgpu_device pointer of the importer
+ * @bo: amdgpu buffer object
+ *
+ * Returns:
+ * True if dmabuf accessible over xgmi, false otherwise.
+ */
+bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo)
+{
+ struct drm_gem_object *obj = &bo->tbo.base;
+ struct drm_gem_object *gobj;
+
+ if (obj->import_attach) {
+ struct dma_buf *dma_buf = obj->import_attach->dmabuf;
+
+ if (dma_buf->ops != &amdgpu_dmabuf_ops)
+ /* No XGMI with non AMD GPUs */
+ return false;
+
+ gobj = dma_buf->priv;
+ bo = gem_to_amdgpu_bo(gobj);
+ }
+
+ if (amdgpu_xgmi_same_hive(adev, amdgpu_ttm_adev(bo->tbo.bdev)) &&
+ (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM))
+ return true;
+
+ return false;
+}