From 1a8b612ef09bcba3708443339adfad9802d3e9d8 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 2 Aug 2023 15:21:49 -0700 Subject: drm/msm: Take lru lock once per job_run Rather than acquiring it and dropping it for each individual obj. Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/551019/ --- drivers/gpu/drm/msm/msm_gem.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/gpu/drm/msm/msm_gem.c') diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 20cfd86d2b32..6d1dbffc3905 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -509,14 +509,11 @@ void msm_gem_unpin_locked(struct drm_gem_object *obj) */ void msm_gem_unpin_active(struct drm_gem_object *obj) { - struct msm_drm_private *priv = obj->dev->dev_private; struct msm_gem_object *msm_obj = to_msm_bo(obj); - mutex_lock(&priv->lru.lock); msm_obj->pin_count--; GEM_WARN_ON(msm_obj->pin_count < 0); update_lru_active(obj); - mutex_unlock(&priv->lru.lock); } struct msm_gem_vma *msm_gem_get_vma_locked(struct drm_gem_object *obj, -- cgit v1.2.3-70-g09d2 From fc896cf3d6913fb0e79ec146fff6dcda5aaa4384 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 2 Aug 2023 15:21:51 -0700 Subject: drm/msm: Take lru lock once per submit_pin_objects() Split out pin_count incrementing and lru updating into a separate loop so we can take the lru lock only once for all objs. Since we are still holding the obj lock, it is safe to split this up. Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/551025/ --- drivers/gpu/drm/msm/msm_gem.c | 45 +++++++++++++++++++++++------------- drivers/gpu/drm/msm/msm_gem.h | 1 + drivers/gpu/drm/msm/msm_gem_submit.c | 17 +++++++++++++- 3 files changed, 46 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/msm/msm_gem.c') diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 6d1dbffc3905..1c81ff6115ac 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -222,9 +222,7 @@ static void put_pages(struct drm_gem_object *obj) static struct page **msm_gem_pin_pages_locked(struct drm_gem_object *obj, unsigned madv) { - struct msm_drm_private *priv = obj->dev->dev_private; struct msm_gem_object *msm_obj = to_msm_bo(obj); - struct page **p; msm_gem_assert_locked(obj); @@ -234,16 +232,29 @@ static struct page **msm_gem_pin_pages_locked(struct drm_gem_object *obj, return ERR_PTR(-EBUSY); } - p = get_pages(obj); - if (IS_ERR(p)) - return p; + return get_pages(obj); +} + +/* + * Update the pin count of the object, call under lru.lock + */ +void msm_gem_pin_obj_locked(struct drm_gem_object *obj) +{ + struct msm_drm_private *priv = obj->dev->dev_private; + + msm_gem_assert_locked(obj); + + to_msm_bo(obj)->pin_count++; + drm_gem_lru_move_tail_locked(&priv->lru.pinned, obj); +} + +static void pin_obj_locked(struct drm_gem_object *obj) +{ + struct msm_drm_private *priv = obj->dev->dev_private; mutex_lock(&priv->lru.lock); - msm_obj->pin_count++; - update_lru_locked(obj); + msm_gem_pin_obj_locked(obj); mutex_unlock(&priv->lru.lock); - - return p; } struct page **msm_gem_pin_pages(struct drm_gem_object *obj) @@ -252,6 +263,8 @@ struct page **msm_gem_pin_pages(struct drm_gem_object *obj) msm_gem_lock(obj); p = msm_gem_pin_pages_locked(obj, MSM_MADV_WILLNEED); + if (!IS_ERR(p)) + pin_obj_locked(obj); msm_gem_unlock(obj); return p; @@ -463,7 +476,7 @@ int msm_gem_pin_vma_locked(struct drm_gem_object *obj, struct msm_gem_vma *vma) { struct msm_gem_object *msm_obj = to_msm_bo(obj); struct page **pages; - int ret, prot = IOMMU_READ; + int prot = IOMMU_READ; if (!(msm_obj->flags & MSM_BO_GPU_READONLY)) prot |= IOMMU_WRITE; @@ -480,11 +493,7 @@ int msm_gem_pin_vma_locked(struct drm_gem_object *obj, struct msm_gem_vma *vma) if (IS_ERR(pages)) return PTR_ERR(pages); - ret = msm_gem_vma_map(vma, prot, msm_obj->sgt, obj->size); - if (ret) - msm_gem_unpin_locked(obj); - - return ret; + return msm_gem_vma_map(vma, prot, msm_obj->sgt, obj->size); } void msm_gem_unpin_locked(struct drm_gem_object *obj) @@ -536,8 +545,10 @@ static int get_and_pin_iova_range_locked(struct drm_gem_object *obj, return PTR_ERR(vma); ret = msm_gem_pin_vma_locked(obj, vma); - if (!ret) + if (!ret) { *iova = vma->iova; + pin_obj_locked(obj); + } return ret; } @@ -700,6 +711,8 @@ static void *get_vaddr(struct drm_gem_object *obj, unsigned madv) if (IS_ERR(pages)) return ERR_CAST(pages); + pin_obj_locked(obj); + /* increment vmap_count *before* vmap() call, so shrinker can * check vmap_count (is_vunmapable()) outside of msm_obj lock. * This guarantees that we won't try to msm_gem_vunmap() this diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 31b370474fa8..2ddd896aac68 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -142,6 +142,7 @@ int msm_gem_get_and_pin_iova(struct drm_gem_object *obj, struct msm_gem_address_space *aspace, uint64_t *iova); void msm_gem_unpin_iova(struct drm_gem_object *obj, struct msm_gem_address_space *aspace); +void msm_gem_pin_obj_locked(struct drm_gem_object *obj); struct page **msm_gem_pin_pages(struct drm_gem_object *obj); void msm_gem_unpin_pages(struct drm_gem_object *obj); int msm_gem_dumb_create(struct drm_file *file, struct drm_device *dev, diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index a03bdded1a15..ec5aa6932ea1 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -384,6 +384,7 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) static int submit_pin_objects(struct msm_gem_submit *submit) { + struct msm_drm_private *priv = submit->dev->dev_private; int i, ret = 0; submit->valid = true; @@ -403,7 +404,7 @@ static int submit_pin_objects(struct msm_gem_submit *submit) if (ret) break; - submit->bos[i].flags |= BO_OBJ_PINNED | BO_VMA_PINNED; + submit->bos[i].flags |= BO_VMA_PINNED; submit->bos[i].vma = vma; if (vma->iova == submit->bos[i].iova) { @@ -416,6 +417,20 @@ static int submit_pin_objects(struct msm_gem_submit *submit) } } + /* + * A second loop while holding the LRU lock (a) avoids acquiring/dropping + * the LRU lock for each individual bo, while (b) avoiding holding the + * LRU lock while calling msm_gem_pin_vma_locked() (which could trigger + * get_pages() which could trigger reclaim.. and if we held the LRU lock + * could trigger deadlock with the shrinker). + */ + mutex_lock(&priv->lru.lock); + for (i = 0; i < submit->nr_bos; i++) { + msm_gem_pin_obj_locked(submit->bos[i].obj); + submit->bos[i].flags |= BO_OBJ_PINNED; + } + mutex_unlock(&priv->lru.lock); + return ret; } -- cgit v1.2.3-70-g09d2 From 7391c282ba0f0e82ac131658e2faf712215ed6a2 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 2 Aug 2023 15:21:52 -0700 Subject: drm/msm: Remove vma use tracking This was not strictly necessary, as page unpinning (ie. shrinker) only cares about the resv. It did give us some extra sanity checking for userspace controlled iova, and was useful to catch issues on kernel and userspace side when enabling userspace iova. But if userspace screws this up, it just corrupts it's own gpu buffers and/or gets iova faults. So we can just let userspace shoot it's own foot and drop the extra per- buffer SUBMIT overhead. Signed-off-by: Rob Clark Acked-by: Daniel Vetter Patchwork: https://patchwork.freedesktop.org/patch/551023/ --- drivers/gpu/drm/msm/msm_gem.c | 9 ++--- drivers/gpu/drm/msm/msm_gem.h | 12 +------ drivers/gpu/drm/msm/msm_gem_submit.c | 14 +++----- drivers/gpu/drm/msm/msm_gem_vma.c | 67 +----------------------------------- drivers/gpu/drm/msm/msm_ringbuffer.c | 3 +- 5 files changed, 9 insertions(+), 96 deletions(-) (limited to 'drivers/gpu/drm/msm/msm_gem.c') diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 1c81ff6115ac..ce1ed0f9ad2d 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -607,9 +607,6 @@ static int clear_iova(struct drm_gem_object *obj, if (!vma) return 0; - if (msm_gem_vma_inuse(vma)) - return -EBUSY; - msm_gem_vma_purge(vma); msm_gem_vma_close(vma); del_vma(vma); @@ -660,7 +657,6 @@ void msm_gem_unpin_iova(struct drm_gem_object *obj, msm_gem_lock(obj); vma = lookup_vma(obj, aspace); if (!GEM_WARN_ON(!vma)) { - msm_gem_vma_unpin(vma); msm_gem_unpin_locked(obj); } msm_gem_unlock(obj); @@ -991,11 +987,10 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, } else { name = comm = NULL; } - seq_printf(m, " [%s%s%s: aspace=%p, %08llx,%s,inuse=%d]", + seq_printf(m, " [%s%s%s: aspace=%p, %08llx,%s]", name, comm ? ":" : "", comm ? comm : "", vma->aspace, vma->iova, - vma->mapped ? "mapped" : "unmapped", - msm_gem_vma_inuse(vma)); + vma->mapped ? "mapped" : "unmapped"); kfree(comm); } diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 2ddd896aac68..8ddef5443140 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -59,24 +59,16 @@ struct msm_fence_context; struct msm_gem_vma { struct drm_mm_node node; - spinlock_t lock; uint64_t iova; struct msm_gem_address_space *aspace; struct list_head list; /* node in msm_gem_object::vmas */ bool mapped; - int inuse; - uint32_t fence_mask; - uint32_t fence[MSM_GPU_MAX_RINGS]; - struct msm_fence_context *fctx[MSM_GPU_MAX_RINGS]; }; struct msm_gem_vma *msm_gem_vma_new(struct msm_gem_address_space *aspace); int msm_gem_vma_init(struct msm_gem_vma *vma, int size, u64 range_start, u64 range_end); -bool msm_gem_vma_inuse(struct msm_gem_vma *vma); void msm_gem_vma_purge(struct msm_gem_vma *vma); -void msm_gem_vma_unpin(struct msm_gem_vma *vma); -void msm_gem_vma_unpin_fenced(struct msm_gem_vma *vma, struct msm_fence_context *fctx); int msm_gem_vma_map(struct msm_gem_vma *vma, int prot, struct sg_table *sgt, int size); void msm_gem_vma_close(struct msm_gem_vma *vma); @@ -298,15 +290,13 @@ struct msm_gem_submit { /* make sure these don't conflict w/ MSM_SUBMIT_BO_x */ #define BO_VALID 0x8000 /* is current addr in cmdstream correct/valid? */ #define BO_LOCKED 0x4000 /* obj lock is held */ -#define BO_OBJ_PINNED 0x2000 /* obj (pages) is pinned and on active list */ -#define BO_VMA_PINNED 0x1000 /* vma (virtual address) is pinned */ +#define BO_PINNED 0x2000 /* obj (pages) is pinned and on active list */ uint32_t flags; union { struct drm_gem_object *obj; uint32_t handle; }; uint64_t iova; - struct msm_gem_vma *vma; } bos[]; }; diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index ec5aa6932ea1..99744de6c05a 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -261,10 +261,7 @@ static void submit_cleanup_bo(struct msm_gem_submit *submit, int i, */ submit->bos[i].flags &= ~cleanup_flags; - if (flags & BO_VMA_PINNED) - msm_gem_vma_unpin(submit->bos[i].vma); - - if (flags & BO_OBJ_PINNED) + if (flags & BO_PINNED) msm_gem_unpin_locked(obj); if (flags & BO_LOCKED) @@ -273,7 +270,7 @@ static void submit_cleanup_bo(struct msm_gem_submit *submit, int i, static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i) { - unsigned cleanup_flags = BO_VMA_PINNED | BO_OBJ_PINNED | BO_LOCKED; + unsigned cleanup_flags = BO_PINNED | BO_LOCKED; submit_cleanup_bo(submit, i, cleanup_flags); if (!(submit->bos[i].flags & BO_VALID)) @@ -404,9 +401,6 @@ static int submit_pin_objects(struct msm_gem_submit *submit) if (ret) break; - submit->bos[i].flags |= BO_VMA_PINNED; - submit->bos[i].vma = vma; - if (vma->iova == submit->bos[i].iova) { submit->bos[i].flags |= BO_VALID; } else { @@ -427,7 +421,7 @@ static int submit_pin_objects(struct msm_gem_submit *submit) mutex_lock(&priv->lru.lock); for (i = 0; i < submit->nr_bos; i++) { msm_gem_pin_obj_locked(submit->bos[i].obj); - submit->bos[i].flags |= BO_OBJ_PINNED; + submit->bos[i].flags |= BO_PINNED; } mutex_unlock(&priv->lru.lock); @@ -554,7 +548,7 @@ static void submit_cleanup(struct msm_gem_submit *submit, bool error) unsigned i; if (error) - cleanup_flags |= BO_VMA_PINNED | BO_OBJ_PINNED; + cleanup_flags |= BO_PINNED; for (i = 0; i < submit->nr_bos; i++) { struct drm_gem_object *obj = submit->bos[i].obj; diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 98287ed99960..11e842dda73c 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -38,41 +38,12 @@ msm_gem_address_space_get(struct msm_gem_address_space *aspace) return aspace; } -bool msm_gem_vma_inuse(struct msm_gem_vma *vma) -{ - bool ret = true; - - spin_lock(&vma->lock); - - if (vma->inuse > 0) - goto out; - - while (vma->fence_mask) { - unsigned idx = ffs(vma->fence_mask) - 1; - - if (!msm_fence_completed(vma->fctx[idx], vma->fence[idx])) - goto out; - - vma->fence_mask &= ~BIT(idx); - } - - ret = false; - -out: - spin_unlock(&vma->lock); - - return ret; -} - /* Actually unmap memory for the vma */ void msm_gem_vma_purge(struct msm_gem_vma *vma) { struct msm_gem_address_space *aspace = vma->aspace; unsigned size = vma->node.size; - /* Print a message if we try to purge a vma in use */ - GEM_WARN_ON(msm_gem_vma_inuse(vma)); - /* Don't do anything if the memory isn't mapped */ if (!vma->mapped) return; @@ -82,33 +53,6 @@ void msm_gem_vma_purge(struct msm_gem_vma *vma) vma->mapped = false; } -static void vma_unpin_locked(struct msm_gem_vma *vma) -{ - if (GEM_WARN_ON(!vma->inuse)) - return; - if (!GEM_WARN_ON(!vma->iova)) - vma->inuse--; -} - -/* Remove reference counts for the mapping */ -void msm_gem_vma_unpin(struct msm_gem_vma *vma) -{ - spin_lock(&vma->lock); - vma_unpin_locked(vma); - spin_unlock(&vma->lock); -} - -/* Replace pin reference with fence: */ -void msm_gem_vma_unpin_fenced(struct msm_gem_vma *vma, struct msm_fence_context *fctx) -{ - spin_lock(&vma->lock); - vma->fctx[fctx->index] = fctx; - vma->fence[fctx->index] = fctx->last_fence; - vma->fence_mask |= BIT(fctx->index); - vma_unpin_locked(vma); - spin_unlock(&vma->lock); -} - /* Map and pin vma: */ int msm_gem_vma_map(struct msm_gem_vma *vma, int prot, @@ -120,11 +64,6 @@ msm_gem_vma_map(struct msm_gem_vma *vma, int prot, if (GEM_WARN_ON(!vma->iova)) return -EINVAL; - /* Increase the usage counter */ - spin_lock(&vma->lock); - vma->inuse++; - spin_unlock(&vma->lock); - if (vma->mapped) return 0; @@ -146,9 +85,6 @@ msm_gem_vma_map(struct msm_gem_vma *vma, int prot, if (ret) { vma->mapped = false; - spin_lock(&vma->lock); - vma->inuse--; - spin_unlock(&vma->lock); } return ret; @@ -159,7 +95,7 @@ void msm_gem_vma_close(struct msm_gem_vma *vma) { struct msm_gem_address_space *aspace = vma->aspace; - GEM_WARN_ON(msm_gem_vma_inuse(vma) || vma->mapped); + GEM_WARN_ON(vma->mapped); spin_lock(&aspace->lock); if (vma->iova) @@ -179,7 +115,6 @@ struct msm_gem_vma *msm_gem_vma_new(struct msm_gem_address_space *aspace) if (!vma) return NULL; - spin_lock_init(&vma->lock); vma->aspace = aspace; return vma; diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 6fa427d2992e..7f5e0a961bba 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -26,9 +26,8 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) for (i = 0; i < submit->nr_bos; i++) { struct drm_gem_object *obj = submit->bos[i].obj; - msm_gem_vma_unpin_fenced(submit->bos[i].vma, fctx); msm_gem_unpin_active(obj); - submit->bos[i].flags &= ~(BO_VMA_PINNED | BO_OBJ_PINNED); + submit->bos[i].flags &= ~BO_PINNED; } mutex_unlock(&priv->lru.lock); -- cgit v1.2.3-70-g09d2