From 1a8b612ef09bcba3708443339adfad9802d3e9d8 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 2 Aug 2023 15:21:49 -0700 Subject: drm/msm: Take lru lock once per job_run Rather than acquiring it and dropping it for each individual obj. Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/551019/ --- drivers/gpu/drm/msm/msm_ringbuffer.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/msm/msm_ringbuffer.c') diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index b60199184409..8b8353dcde9f 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -16,10 +16,13 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) struct msm_gem_submit *submit = to_msm_submit(job); struct msm_fence_context *fctx = submit->ring->fctx; struct msm_gpu *gpu = submit->gpu; + struct msm_drm_private *priv = gpu->dev->dev_private; int i; msm_fence_init(submit->hw_fence, fctx); + mutex_lock(&priv->lru.lock); + for (i = 0; i < submit->nr_bos; i++) { struct drm_gem_object *obj = &submit->bos[i].obj->base; @@ -28,6 +31,8 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) submit->bos[i].flags &= ~(BO_VMA_PINNED | BO_OBJ_PINNED); } + mutex_unlock(&priv->lru.lock); + /* TODO move submit path over to using a per-ring lock.. */ mutex_lock(&gpu->lock); -- cgit v1.2.3-70-g09d2 From 6ba5daa5d5ad54b78aeac8912092f986e8d4c38f Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 2 Aug 2023 15:21:50 -0700 Subject: drm/msm: Use drm_gem_object in submit bos table Basically everywhere wants the base ptr type. So store that instead of msm_gem_object. Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/551021/ --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 6 ++--- drivers/gpu/drm/msm/msm_gem.h | 2 +- drivers/gpu/drm/msm/msm_gem_submit.c | 42 +++++++++++++++++------------------ drivers/gpu/drm/msm/msm_gpu.c | 20 ++++++++--------- drivers/gpu/drm/msm/msm_rd.c | 8 +++---- drivers/gpu/drm/msm/msm_ringbuffer.c | 2 +- 6 files changed, 40 insertions(+), 40 deletions(-) (limited to 'drivers/gpu/drm/msm/msm_ringbuffer.c') diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 041d1cd8ed99..e5916c106796 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -66,7 +66,7 @@ void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit) { struct msm_ringbuffer *ring = submit->ring; - struct msm_gem_object *obj; + struct drm_gem_object *obj; uint32_t *ptr, dwords; unsigned int i; @@ -83,7 +83,7 @@ static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit obj = submit->bos[submit->cmd[i].idx].obj; dwords = submit->cmd[i].size; - ptr = msm_gem_get_vaddr(&obj->base); + ptr = msm_gem_get_vaddr(obj); /* _get_vaddr() shouldn't fail at this point, * since we've already mapped it once in @@ -103,7 +103,7 @@ static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit OUT_RING(ring, ptr[i]); } - msm_gem_put_vaddr(&obj->base); + msm_gem_put_vaddr(obj); break; } diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 2bd6846c83a9..31b370474fa8 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -301,7 +301,7 @@ struct msm_gem_submit { #define BO_VMA_PINNED 0x1000 /* vma (virtual address) is pinned */ uint32_t flags; union { - struct msm_gem_object *obj; + struct drm_gem_object *obj; uint32_t handle; }; uint64_t iova; diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 3b908f9f5493..a03bdded1a15 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -165,7 +165,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, drm_gem_object_get(obj); - submit->bos[i].obj = to_msm_bo(obj); + submit->bos[i].obj = obj; } out_unlock: @@ -251,7 +251,7 @@ out: static void submit_cleanup_bo(struct msm_gem_submit *submit, int i, unsigned cleanup_flags) { - struct drm_gem_object *obj = &submit->bos[i].obj->base; + struct drm_gem_object *obj = submit->bos[i].obj; unsigned flags = submit->bos[i].flags & cleanup_flags; /* @@ -287,7 +287,7 @@ static int submit_lock_objects(struct msm_gem_submit *submit) retry: for (i = 0; i < submit->nr_bos; i++) { - struct msm_gem_object *msm_obj = submit->bos[i].obj; + struct drm_gem_object *obj = submit->bos[i].obj; if (slow_locked == i) slow_locked = -1; @@ -295,7 +295,7 @@ retry: contended = i; if (!(submit->bos[i].flags & BO_LOCKED)) { - ret = dma_resv_lock_interruptible(msm_obj->base.resv, + ret = dma_resv_lock_interruptible(obj->resv, &submit->ticket); if (ret) goto fail; @@ -321,9 +321,9 @@ fail: submit_unlock_unpin_bo(submit, slow_locked); if (ret == -EDEADLK) { - struct msm_gem_object *msm_obj = submit->bos[contended].obj; + struct drm_gem_object *obj = submit->bos[contended].obj; /* we lost out in a seqno race, lock and retry.. */ - ret = dma_resv_lock_slow_interruptible(msm_obj->base.resv, + ret = dma_resv_lock_slow_interruptible(obj->resv, &submit->ticket); if (!ret) { submit->bos[contended].flags |= BO_LOCKED; @@ -346,7 +346,7 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) int i, ret = 0; for (i = 0; i < submit->nr_bos; i++) { - struct drm_gem_object *obj = &submit->bos[i].obj->base; + struct drm_gem_object *obj = submit->bos[i].obj; bool write = submit->bos[i].flags & MSM_SUBMIT_BO_WRITE; /* NOTE: _reserve_shared() must happen before @@ -389,7 +389,7 @@ static int submit_pin_objects(struct msm_gem_submit *submit) submit->valid = true; for (i = 0; i < submit->nr_bos; i++) { - struct drm_gem_object *obj = &submit->bos[i].obj->base; + struct drm_gem_object *obj = submit->bos[i].obj; struct msm_gem_vma *vma; /* if locking succeeded, pin bo: */ @@ -424,7 +424,7 @@ static void submit_attach_object_fences(struct msm_gem_submit *submit) int i; for (i = 0; i < submit->nr_bos; i++) { - struct drm_gem_object *obj = &submit->bos[i].obj->base; + struct drm_gem_object *obj = submit->bos[i].obj; if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) dma_resv_add_fence(obj->resv, submit->user_fence, @@ -436,7 +436,7 @@ static void submit_attach_object_fences(struct msm_gem_submit *submit) } static int submit_bo(struct msm_gem_submit *submit, uint32_t idx, - struct msm_gem_object **obj, uint64_t *iova, bool *valid) + struct drm_gem_object **obj, uint64_t *iova, bool *valid) { if (idx >= submit->nr_bos) { DRM_ERROR("invalid buffer index: %u (out of %u)\n", @@ -455,7 +455,7 @@ static int submit_bo(struct msm_gem_submit *submit, uint32_t idx, } /* process the reloc's and patch up the cmdstream as needed: */ -static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *obj, +static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *obj, uint32_t offset, uint32_t nr_relocs, struct drm_msm_gem_submit_reloc *relocs) { uint32_t i, last_offset = 0; @@ -473,7 +473,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob /* For now, just map the entire thing. Eventually we probably * to do it page-by-page, w/ kmap() if not vmap()d.. */ - ptr = msm_gem_get_vaddr_locked(&obj->base); + ptr = msm_gem_get_vaddr_locked(obj); if (IS_ERR(ptr)) { ret = PTR_ERR(ptr); @@ -497,7 +497,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob /* offset in dwords: */ off = submit_reloc.submit_offset / 4; - if ((off >= (obj->base.size / 4)) || + if ((off >= (obj->size / 4)) || (off < last_offset)) { DRM_ERROR("invalid offset %u at reloc %u\n", off, i); ret = -EINVAL; @@ -524,7 +524,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob } out: - msm_gem_put_vaddr_locked(&obj->base); + msm_gem_put_vaddr_locked(obj); return ret; } @@ -542,10 +542,10 @@ static void submit_cleanup(struct msm_gem_submit *submit, bool error) cleanup_flags |= BO_VMA_PINNED | BO_OBJ_PINNED; for (i = 0; i < submit->nr_bos; i++) { - struct msm_gem_object *msm_obj = submit->bos[i].obj; + struct drm_gem_object *obj = submit->bos[i].obj; submit_cleanup_bo(submit, i, cleanup_flags); if (error) - drm_gem_object_put(&msm_obj->base); + drm_gem_object_put(obj); } } @@ -554,7 +554,7 @@ void msm_submit_retire(struct msm_gem_submit *submit) int i; for (i = 0; i < submit->nr_bos; i++) { - struct drm_gem_object *obj = &submit->bos[i].obj->base; + struct drm_gem_object *obj = submit->bos[i].obj; drm_gem_object_put(obj); } @@ -861,17 +861,17 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, goto out; for (i = 0; i < args->nr_cmds; i++) { - struct msm_gem_object *msm_obj; + struct drm_gem_object *obj; uint64_t iova; ret = submit_bo(submit, submit->cmd[i].idx, - &msm_obj, &iova, NULL); + &obj, &iova, NULL); if (ret) goto out; if (!submit->cmd[i].size || ((submit->cmd[i].size + submit->cmd[i].offset) > - msm_obj->base.size / 4)) { + obj->size / 4)) { DRM_ERROR("invalid cmdstream size: %u\n", submit->cmd[i].size * 4); ret = -EINVAL; goto out; @@ -892,7 +892,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, continue; } - ret = submit_reloc(submit, msm_obj, submit->cmd[i].offset * 4, + ret = submit_reloc(submit, obj, submit->cmd[i].offset * 4, submit->cmd[i].nr_relocs, submit->cmd[i].relocs); if (ret) goto out; diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 8a9bacc920eb..5c10b559a595 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -219,36 +219,36 @@ static void msm_gpu_devcoredump_free(void *data) } static void msm_gpu_crashstate_get_bo(struct msm_gpu_state *state, - struct msm_gem_object *obj, u64 iova, bool full) + struct drm_gem_object *obj, u64 iova, bool full) { struct msm_gpu_state_bo *state_bo = &state->bos[state->nr_bos]; /* Don't record write only objects */ - state_bo->size = obj->base.size; + state_bo->size = obj->size; state_bo->iova = iova; - BUILD_BUG_ON(sizeof(state_bo->name) != sizeof(obj->name)); + BUILD_BUG_ON(sizeof(state_bo->name) != sizeof(to_msm_bo(obj)->name)); - memcpy(state_bo->name, obj->name, sizeof(state_bo->name)); + memcpy(state_bo->name, to_msm_bo(obj)->name, sizeof(state_bo->name)); if (full) { void *ptr; - state_bo->data = kvmalloc(obj->base.size, GFP_KERNEL); + state_bo->data = kvmalloc(obj->size, GFP_KERNEL); if (!state_bo->data) goto out; - msm_gem_lock(&obj->base); - ptr = msm_gem_get_vaddr_active(&obj->base); - msm_gem_unlock(&obj->base); + msm_gem_lock(obj); + ptr = msm_gem_get_vaddr_active(obj); + msm_gem_unlock(obj); if (IS_ERR(ptr)) { kvfree(state_bo->data); state_bo->data = NULL; goto out; } - memcpy(state_bo->data, ptr, obj->base.size); - msm_gem_put_vaddr(&obj->base); + memcpy(state_bo->data, ptr, obj->size); + msm_gem_put_vaddr(obj); } out: state->nr_bos++; diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index 8d5687d5ed78..5adc51f7ab59 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -310,7 +310,7 @@ static void snapshot_buf(struct msm_rd_state *rd, struct msm_gem_submit *submit, int idx, uint64_t iova, uint32_t size, bool full) { - struct msm_gem_object *obj = submit->bos[idx].obj; + struct drm_gem_object *obj = submit->bos[idx].obj; unsigned offset = 0; const char *buf; @@ -318,7 +318,7 @@ static void snapshot_buf(struct msm_rd_state *rd, offset = iova - submit->bos[idx].iova; } else { iova = submit->bos[idx].iova; - size = obj->base.size; + size = obj->size; } /* @@ -335,7 +335,7 @@ static void snapshot_buf(struct msm_rd_state *rd, if (!(submit->bos[idx].flags & MSM_SUBMIT_BO_READ)) return; - buf = msm_gem_get_vaddr_active(&obj->base); + buf = msm_gem_get_vaddr_active(obj); if (IS_ERR(buf)) return; @@ -343,7 +343,7 @@ static void snapshot_buf(struct msm_rd_state *rd, rd_write_section(rd, RD_BUFFER_CONTENTS, buf, size); - msm_gem_put_vaddr_locked(&obj->base); + msm_gem_put_vaddr_locked(obj); } /* called under gpu->lock */ diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 8b8353dcde9f..6fa427d2992e 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -24,7 +24,7 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) mutex_lock(&priv->lru.lock); for (i = 0; i < submit->nr_bos; i++) { - struct drm_gem_object *obj = &submit->bos[i].obj->base; + struct drm_gem_object *obj = submit->bos[i].obj; msm_gem_vma_unpin_fenced(submit->bos[i].vma, fctx); msm_gem_unpin_active(obj); -- cgit v1.2.3-70-g09d2 From 7391c282ba0f0e82ac131658e2faf712215ed6a2 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 2 Aug 2023 15:21:52 -0700 Subject: drm/msm: Remove vma use tracking This was not strictly necessary, as page unpinning (ie. shrinker) only cares about the resv. It did give us some extra sanity checking for userspace controlled iova, and was useful to catch issues on kernel and userspace side when enabling userspace iova. But if userspace screws this up, it just corrupts it's own gpu buffers and/or gets iova faults. So we can just let userspace shoot it's own foot and drop the extra per- buffer SUBMIT overhead. Signed-off-by: Rob Clark Acked-by: Daniel Vetter Patchwork: https://patchwork.freedesktop.org/patch/551023/ --- drivers/gpu/drm/msm/msm_gem.c | 9 ++--- drivers/gpu/drm/msm/msm_gem.h | 12 +------ drivers/gpu/drm/msm/msm_gem_submit.c | 14 +++----- drivers/gpu/drm/msm/msm_gem_vma.c | 67 +----------------------------------- drivers/gpu/drm/msm/msm_ringbuffer.c | 3 +- 5 files changed, 9 insertions(+), 96 deletions(-) (limited to 'drivers/gpu/drm/msm/msm_ringbuffer.c') diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 1c81ff6115ac..ce1ed0f9ad2d 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -607,9 +607,6 @@ static int clear_iova(struct drm_gem_object *obj, if (!vma) return 0; - if (msm_gem_vma_inuse(vma)) - return -EBUSY; - msm_gem_vma_purge(vma); msm_gem_vma_close(vma); del_vma(vma); @@ -660,7 +657,6 @@ void msm_gem_unpin_iova(struct drm_gem_object *obj, msm_gem_lock(obj); vma = lookup_vma(obj, aspace); if (!GEM_WARN_ON(!vma)) { - msm_gem_vma_unpin(vma); msm_gem_unpin_locked(obj); } msm_gem_unlock(obj); @@ -991,11 +987,10 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, } else { name = comm = NULL; } - seq_printf(m, " [%s%s%s: aspace=%p, %08llx,%s,inuse=%d]", + seq_printf(m, " [%s%s%s: aspace=%p, %08llx,%s]", name, comm ? ":" : "", comm ? comm : "", vma->aspace, vma->iova, - vma->mapped ? "mapped" : "unmapped", - msm_gem_vma_inuse(vma)); + vma->mapped ? "mapped" : "unmapped"); kfree(comm); } diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 2ddd896aac68..8ddef5443140 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -59,24 +59,16 @@ struct msm_fence_context; struct msm_gem_vma { struct drm_mm_node node; - spinlock_t lock; uint64_t iova; struct msm_gem_address_space *aspace; struct list_head list; /* node in msm_gem_object::vmas */ bool mapped; - int inuse; - uint32_t fence_mask; - uint32_t fence[MSM_GPU_MAX_RINGS]; - struct msm_fence_context *fctx[MSM_GPU_MAX_RINGS]; }; struct msm_gem_vma *msm_gem_vma_new(struct msm_gem_address_space *aspace); int msm_gem_vma_init(struct msm_gem_vma *vma, int size, u64 range_start, u64 range_end); -bool msm_gem_vma_inuse(struct msm_gem_vma *vma); void msm_gem_vma_purge(struct msm_gem_vma *vma); -void msm_gem_vma_unpin(struct msm_gem_vma *vma); -void msm_gem_vma_unpin_fenced(struct msm_gem_vma *vma, struct msm_fence_context *fctx); int msm_gem_vma_map(struct msm_gem_vma *vma, int prot, struct sg_table *sgt, int size); void msm_gem_vma_close(struct msm_gem_vma *vma); @@ -298,15 +290,13 @@ struct msm_gem_submit { /* make sure these don't conflict w/ MSM_SUBMIT_BO_x */ #define BO_VALID 0x8000 /* is current addr in cmdstream correct/valid? */ #define BO_LOCKED 0x4000 /* obj lock is held */ -#define BO_OBJ_PINNED 0x2000 /* obj (pages) is pinned and on active list */ -#define BO_VMA_PINNED 0x1000 /* vma (virtual address) is pinned */ +#define BO_PINNED 0x2000 /* obj (pages) is pinned and on active list */ uint32_t flags; union { struct drm_gem_object *obj; uint32_t handle; }; uint64_t iova; - struct msm_gem_vma *vma; } bos[]; }; diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index ec5aa6932ea1..99744de6c05a 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -261,10 +261,7 @@ static void submit_cleanup_bo(struct msm_gem_submit *submit, int i, */ submit->bos[i].flags &= ~cleanup_flags; - if (flags & BO_VMA_PINNED) - msm_gem_vma_unpin(submit->bos[i].vma); - - if (flags & BO_OBJ_PINNED) + if (flags & BO_PINNED) msm_gem_unpin_locked(obj); if (flags & BO_LOCKED) @@ -273,7 +270,7 @@ static void submit_cleanup_bo(struct msm_gem_submit *submit, int i, static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i) { - unsigned cleanup_flags = BO_VMA_PINNED | BO_OBJ_PINNED | BO_LOCKED; + unsigned cleanup_flags = BO_PINNED | BO_LOCKED; submit_cleanup_bo(submit, i, cleanup_flags); if (!(submit->bos[i].flags & BO_VALID)) @@ -404,9 +401,6 @@ static int submit_pin_objects(struct msm_gem_submit *submit) if (ret) break; - submit->bos[i].flags |= BO_VMA_PINNED; - submit->bos[i].vma = vma; - if (vma->iova == submit->bos[i].iova) { submit->bos[i].flags |= BO_VALID; } else { @@ -427,7 +421,7 @@ static int submit_pin_objects(struct msm_gem_submit *submit) mutex_lock(&priv->lru.lock); for (i = 0; i < submit->nr_bos; i++) { msm_gem_pin_obj_locked(submit->bos[i].obj); - submit->bos[i].flags |= BO_OBJ_PINNED; + submit->bos[i].flags |= BO_PINNED; } mutex_unlock(&priv->lru.lock); @@ -554,7 +548,7 @@ static void submit_cleanup(struct msm_gem_submit *submit, bool error) unsigned i; if (error) - cleanup_flags |= BO_VMA_PINNED | BO_OBJ_PINNED; + cleanup_flags |= BO_PINNED; for (i = 0; i < submit->nr_bos; i++) { struct drm_gem_object *obj = submit->bos[i].obj; diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 98287ed99960..11e842dda73c 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -38,41 +38,12 @@ msm_gem_address_space_get(struct msm_gem_address_space *aspace) return aspace; } -bool msm_gem_vma_inuse(struct msm_gem_vma *vma) -{ - bool ret = true; - - spin_lock(&vma->lock); - - if (vma->inuse > 0) - goto out; - - while (vma->fence_mask) { - unsigned idx = ffs(vma->fence_mask) - 1; - - if (!msm_fence_completed(vma->fctx[idx], vma->fence[idx])) - goto out; - - vma->fence_mask &= ~BIT(idx); - } - - ret = false; - -out: - spin_unlock(&vma->lock); - - return ret; -} - /* Actually unmap memory for the vma */ void msm_gem_vma_purge(struct msm_gem_vma *vma) { struct msm_gem_address_space *aspace = vma->aspace; unsigned size = vma->node.size; - /* Print a message if we try to purge a vma in use */ - GEM_WARN_ON(msm_gem_vma_inuse(vma)); - /* Don't do anything if the memory isn't mapped */ if (!vma->mapped) return; @@ -82,33 +53,6 @@ void msm_gem_vma_purge(struct msm_gem_vma *vma) vma->mapped = false; } -static void vma_unpin_locked(struct msm_gem_vma *vma) -{ - if (GEM_WARN_ON(!vma->inuse)) - return; - if (!GEM_WARN_ON(!vma->iova)) - vma->inuse--; -} - -/* Remove reference counts for the mapping */ -void msm_gem_vma_unpin(struct msm_gem_vma *vma) -{ - spin_lock(&vma->lock); - vma_unpin_locked(vma); - spin_unlock(&vma->lock); -} - -/* Replace pin reference with fence: */ -void msm_gem_vma_unpin_fenced(struct msm_gem_vma *vma, struct msm_fence_context *fctx) -{ - spin_lock(&vma->lock); - vma->fctx[fctx->index] = fctx; - vma->fence[fctx->index] = fctx->last_fence; - vma->fence_mask |= BIT(fctx->index); - vma_unpin_locked(vma); - spin_unlock(&vma->lock); -} - /* Map and pin vma: */ int msm_gem_vma_map(struct msm_gem_vma *vma, int prot, @@ -120,11 +64,6 @@ msm_gem_vma_map(struct msm_gem_vma *vma, int prot, if (GEM_WARN_ON(!vma->iova)) return -EINVAL; - /* Increase the usage counter */ - spin_lock(&vma->lock); - vma->inuse++; - spin_unlock(&vma->lock); - if (vma->mapped) return 0; @@ -146,9 +85,6 @@ msm_gem_vma_map(struct msm_gem_vma *vma, int prot, if (ret) { vma->mapped = false; - spin_lock(&vma->lock); - vma->inuse--; - spin_unlock(&vma->lock); } return ret; @@ -159,7 +95,7 @@ void msm_gem_vma_close(struct msm_gem_vma *vma) { struct msm_gem_address_space *aspace = vma->aspace; - GEM_WARN_ON(msm_gem_vma_inuse(vma) || vma->mapped); + GEM_WARN_ON(vma->mapped); spin_lock(&aspace->lock); if (vma->iova) @@ -179,7 +115,6 @@ struct msm_gem_vma *msm_gem_vma_new(struct msm_gem_address_space *aspace) if (!vma) return NULL; - spin_lock_init(&vma->lock); vma->aspace = aspace; return vma; diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 6fa427d2992e..7f5e0a961bba 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -26,9 +26,8 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) for (i = 0; i < submit->nr_bos; i++) { struct drm_gem_object *obj = submit->bos[i].obj; - msm_gem_vma_unpin_fenced(submit->bos[i].vma, fctx); msm_gem_unpin_active(obj); - submit->bos[i].flags &= ~(BO_VMA_PINNED | BO_OBJ_PINNED); + submit->bos[i].flags &= ~BO_PINNED; } mutex_unlock(&priv->lru.lock); -- cgit v1.2.3-70-g09d2 From abe2023b4cea192ab266b351fd38dc9dbd846df0 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 10 Aug 2023 14:31:41 -0700 Subject: drm/msm/gpu: Push gpu lock down past runpm Avoid holding gpu lock when calling runpm, to avoid this lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 6.4.3-debug+ #14 Not tainted ------------------------------------------------------ ring0/373 is trying to acquire lock: ffffffead86efb98 (prepare_lock){+.+.}-{3:3}, at: clk_prepare_lock+0x70/0x98 but task is already holding lock: ffffff809cd19170 (&gpu->lock){+.+.}-{3:3}, at: msm_job_run+0x7c/0x128 [msm] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #4 (&gpu->lock){+.+.}-{3:3}: __mutex_lock+0xc8/0x388 mutex_lock_nested+0x2c/0x38 msm_job_run+0x7c/0x128 [msm] drm_sched_main+0x264/0x354 [gpu_sched] kthread+0xf0/0x100 ret_from_fork+0x10/0x20 -> #3 (dma_fence_map){++++}-{0:0}: __dma_fence_might_wait+0x74/0xc0 dma_resv_lockdep+0x1f0/0x2e8 do_one_initcall+0xb4/0x214 kernel_init_freeable+0x338/0x33c kernel_init+0x30/0x134 ret_from_fork+0x10/0x20 -> #2 (mmu_notifier_invalidate_range_start){+.+.}-{0:0}: fs_reclaim_acquire+0x7c/0x9c slab_pre_alloc_hook.constprop.0+0x40/0x250 __kmem_cache_alloc_node+0x60/0x18c kmalloc_node_trace+0x40/0x84 alloc_worker+0x2c/0x64 init_rescuer+0x34/0xe0 workqueue_init+0x168/0x1fc kernel_init_freeable+0x15c/0x33c kernel_init+0x30/0x134 ret_from_fork+0x10/0x20 -> #1 (fs_reclaim){+.+.}-{0:0}: __fs_reclaim_acquire+0x3c/0x48 fs_reclaim_acquire+0x50/0x9c slab_pre_alloc_hook.constprop.0+0x40/0x250 __kmem_cache_alloc_node+0x60/0x18c kmalloc_trace+0x44/0x88 clk_rcg2_dfs_determine_rate+0x60/0x214 clk_core_determine_round_nolock+0xb8/0xf0 clk_core_round_rate_nolock+0x84/0x118 clk_core_round_rate_nolock+0xd8/0x118 clk_round_rate+0x6c/0xd0 geni_se_clk_tbl_get+0x78/0xc0 geni_se_clk_freq_match+0x44/0xe4 get_spi_clk_cfg+0x50/0xf4 geni_spi_set_clock_and_bw+0x54/0x104 spi_geni_prepare_message+0x130/0x174 __spi_pump_transfer_message+0x200/0x4d8 __spi_sync+0x13c/0x23c spi_sync_locked+0x18/0x24 do_cros_ec_pkt_xfer_spi+0x124/0x3f0 cros_ec_xfer_high_pri_work+0x28/0x3c kthread_worker_fn+0x14c/0x27c kthread+0xf0/0x100 ret_from_fork+0x10/0x20 -> #0 (prepare_lock){+.+.}-{3:3}: __lock_acquire+0xdf8/0x109c lock_acquire+0x234/0x284 __mutex_lock+0xc8/0x388 mutex_lock_nested+0x2c/0x38 clk_prepare_lock+0x70/0x98 clk_prepare+0x24/0x50 clk_bulk_prepare+0x50/0x9c a6xx_gmu_resume+0x94/0x800 [msm] a6xx_gmu_pm_resume+0x38/0x158 [msm] adreno_runtime_resume+0x2c/0x38 [msm] pm_generic_runtime_resume+0x30/0x44 __rpm_callback+0x4c/0x134 rpm_callback+0x78/0x7c rpm_resume+0x3a4/0x46c __pm_runtime_resume+0x78/0xbc pm_runtime_get_sync.isra.0+0x14/0x20 [msm] msm_gpu_submit+0x4c/0x12c [msm] msm_job_run+0x88/0x128 [msm] drm_sched_main+0x264/0x354 [gpu_sched] kthread+0xf0/0x100 ret_from_fork+0x10/0x20 other info that might help us debug this: Chain exists of: prepare_lock --> dma_fence_map --> &gpu->lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&gpu->lock); lock(dma_fence_map); lock(&gpu->lock); lock(prepare_lock); *** DEADLOCK *** 2 locks held by ring0/373: #0: ffffffead875ae50 (dma_fence_map){++++}-{0:0}, at: drm_sched_main+0x54/0x354 [gpu_sched] #1: ffffff809cd19170 (&gpu->lock){+.+.}-{3:3}, at: msm_job_run+0x7c/0x128 [msm] stack backtrace: CPU: 2 PID: 373 Comm: ring0 Not tainted 6.4.3-debug+ #14 Hardware name: Google Villager (rev1+) with LTE (DT) Call trace: dump_backtrace+0xb4/0xf0 show_stack+0x20/0x30 dump_stack_lvl+0x60/0x84 dump_stack+0x18/0x24 print_circular_bug+0x1cc/0x234 check_noncircular+0x78/0xac __lock_acquire+0xdf8/0x109c lock_acquire+0x234/0x284 __mutex_lock+0xc8/0x388 mutex_lock_nested+0x2c/0x38 clk_prepare_lock+0x70/0x98 clk_prepare+0x24/0x50 clk_bulk_prepare+0x50/0x9c a6xx_gmu_resume+0x94/0x800 [msm] a6xx_gmu_pm_resume+0x38/0x158 [msm] adreno_runtime_resume+0x2c/0x38 [msm] pm_generic_runtime_resume+0x30/0x44 __rpm_callback+0x4c/0x134 rpm_callback+0x78/0x7c rpm_resume+0x3a4/0x46c __pm_runtime_resume+0x78/0xbc pm_runtime_get_sync.isra.0+0x14/0x20 [msm] msm_gpu_submit+0x4c/0x12c [msm] msm_job_run+0x88/0x128 [msm] drm_sched_main+0x264/0x354 [gpu_sched] kthread+0xf0/0x100 ret_from_fork+0x10/0x20 Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/552298/ --- drivers/gpu/drm/msm/msm_gpu.c | 11 ++++++----- drivers/gpu/drm/msm/msm_ringbuffer.c | 7 ++----- 2 files changed, 8 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/msm/msm_ringbuffer.c') diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 5c10b559a595..7f64c6667300 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -749,13 +749,11 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned long flags; - WARN_ON(!mutex_is_locked(&gpu->lock)); - pm_runtime_get_sync(&gpu->pdev->dev); - msm_gpu_hw_init(gpu); + mutex_lock(&gpu->lock); - submit->seqno = submit->hw_fence->seqno; + msm_gpu_hw_init(gpu); update_sw_cntrs(gpu); @@ -781,8 +779,11 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) gpu->funcs->submit(gpu, submit); gpu->cur_ctx_seqno = submit->queue->ctx->seqno; - pm_runtime_put(&gpu->pdev->dev); hangcheck_timer_reset(gpu); + + mutex_unlock(&gpu->lock); + + pm_runtime_put(&gpu->pdev->dev); } /* diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 7f5e0a961bba..40c0bc35a44c 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -21,6 +21,8 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) msm_fence_init(submit->hw_fence, fctx); + submit->seqno = submit->hw_fence->seqno; + mutex_lock(&priv->lru.lock); for (i = 0; i < submit->nr_bos; i++) { @@ -32,13 +34,8 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) mutex_unlock(&priv->lru.lock); - /* TODO move submit path over to using a per-ring lock.. */ - mutex_lock(&gpu->lock); - msm_gpu_submit(gpu, submit); - mutex_unlock(&gpu->lock); - return dma_fence_get(submit->hw_fence); } -- cgit v1.2.3-70-g09d2