diff options
Diffstat (limited to 'drivers/gpu/drm/vc4')
-rw-r--r-- | drivers/gpu/drm/vc4/Makefile | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_bo.c | 287 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_dpi.c | 17 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.c | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.h | 30 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_dsi.c | 144 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_gem.c | 156 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_hdmi.c | 15 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_plane.c | 40 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_trace.h | 2 |
10 files changed, 608 insertions, 95 deletions
diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile index 837c82757339..f5500df51686 100644 --- a/drivers/gpu/drm/vc4/Makefile +++ b/drivers/gpu/drm/vc4/Makefile @@ -25,5 +25,3 @@ vc4-y := \ vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o obj-$(CONFIG_DRM_VC4) += vc4.o - -CFLAGS_vc4_trace_points.o := -I$(src) diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 3afdbf4bc10b..98a6cb9f44fc 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -53,6 +53,17 @@ static void vc4_bo_stats_dump(struct vc4_dev *vc4) vc4->bo_labels[i].size_allocated / 1024, vc4->bo_labels[i].num_allocated); } + + mutex_lock(&vc4->purgeable.lock); + if (vc4->purgeable.num) + DRM_INFO("%30s: %6zdkb BOs (%d)\n", "userspace BO cache", + vc4->purgeable.size / 1024, vc4->purgeable.num); + + if (vc4->purgeable.purged_num) + DRM_INFO("%30s: %6zdkb BOs (%d)\n", "total purged BO", + vc4->purgeable.purged_size / 1024, + vc4->purgeable.purged_num); + mutex_unlock(&vc4->purgeable.lock); } #ifdef CONFIG_DEBUG_FS @@ -75,6 +86,17 @@ int vc4_bo_stats_debugfs(struct seq_file *m, void *unused) } mutex_unlock(&vc4->bo_lock); + mutex_lock(&vc4->purgeable.lock); + if (vc4->purgeable.num) + seq_printf(m, "%30s: %6zdkb BOs (%d)\n", "userspace BO cache", + vc4->purgeable.size / 1024, vc4->purgeable.num); + + if (vc4->purgeable.purged_num) + seq_printf(m, "%30s: %6zdkb BOs (%d)\n", "total purged BO", + vc4->purgeable.purged_size / 1024, + vc4->purgeable.purged_num); + mutex_unlock(&vc4->purgeable.lock); + return 0; } #endif @@ -247,6 +269,109 @@ static void vc4_bo_cache_purge(struct drm_device *dev) mutex_unlock(&vc4->bo_lock); } +void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo) +{ + struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev); + + mutex_lock(&vc4->purgeable.lock); + list_add_tail(&bo->size_head, &vc4->purgeable.list); + vc4->purgeable.num++; + vc4->purgeable.size += bo->base.base.size; + mutex_unlock(&vc4->purgeable.lock); +} + +static void vc4_bo_remove_from_purgeable_pool_locked(struct vc4_bo *bo) +{ + struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev); + + /* list_del_init() is used here because the caller might release + * the purgeable lock in order to acquire the madv one and update the + * madv status. + * During this short period of time a user might decide to mark + * the BO as unpurgeable, and if bo->madv is set to + * VC4_MADV_DONTNEED it will try to remove the BO from the + * purgeable list which will fail if the ->next/prev fields + * are set to LIST_POISON1/LIST_POISON2 (which is what + * list_del() does). + * Re-initializing the list element guarantees that list_del() + * will work correctly even if it's a NOP. + */ + list_del_init(&bo->size_head); + vc4->purgeable.num--; + vc4->purgeable.size -= bo->base.base.size; +} + +void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo) +{ + struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev); + + mutex_lock(&vc4->purgeable.lock); + vc4_bo_remove_from_purgeable_pool_locked(bo); + mutex_unlock(&vc4->purgeable.lock); +} + +static void vc4_bo_purge(struct drm_gem_object *obj) +{ + struct vc4_bo *bo = to_vc4_bo(obj); + struct drm_device *dev = obj->dev; + + WARN_ON(!mutex_is_locked(&bo->madv_lock)); + WARN_ON(bo->madv != VC4_MADV_DONTNEED); + + drm_vma_node_unmap(&obj->vma_node, dev->anon_inode->i_mapping); + + dma_free_wc(dev->dev, obj->size, bo->base.vaddr, bo->base.paddr); + bo->base.vaddr = NULL; + bo->madv = __VC4_MADV_PURGED; +} + +static void vc4_bo_userspace_cache_purge(struct drm_device *dev) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + + mutex_lock(&vc4->purgeable.lock); + while (!list_empty(&vc4->purgeable.list)) { + struct vc4_bo *bo = list_first_entry(&vc4->purgeable.list, + struct vc4_bo, size_head); + struct drm_gem_object *obj = &bo->base.base; + size_t purged_size = 0; + + vc4_bo_remove_from_purgeable_pool_locked(bo); + + /* Release the purgeable lock while we're purging the BO so + * that other people can continue inserting things in the + * purgeable pool without having to wait for all BOs to be + * purged. + */ + mutex_unlock(&vc4->purgeable.lock); + mutex_lock(&bo->madv_lock); + + /* Since we released the purgeable pool lock before acquiring + * the BO madv one, the user may have marked the BO as WILLNEED + * and re-used it in the meantime. + * Before purging the BO we need to make sure + * - it is still marked as DONTNEED + * - it has not been re-inserted in the purgeable list + * - it is not used by HW blocks + * If one of these conditions is not met, just skip the entry. + */ + if (bo->madv == VC4_MADV_DONTNEED && + list_empty(&bo->size_head) && + !refcount_read(&bo->usecnt)) { + purged_size = bo->base.base.size; + vc4_bo_purge(obj); + } + mutex_unlock(&bo->madv_lock); + mutex_lock(&vc4->purgeable.lock); + + if (purged_size) { + vc4->purgeable.purged_size += purged_size; + vc4->purgeable.purged_num++; + } + } + mutex_unlock(&vc4->purgeable.lock); +} + static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev, uint32_t size, enum vc4_kernel_bo_type type) @@ -293,6 +418,9 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size) if (!bo) return ERR_PTR(-ENOMEM); + bo->madv = VC4_MADV_WILLNEED; + refcount_set(&bo->usecnt, 0); + mutex_init(&bo->madv_lock); mutex_lock(&vc4->bo_lock); bo->label = VC4_BO_TYPE_KERNEL; vc4->bo_labels[VC4_BO_TYPE_KERNEL].num_allocated++; @@ -330,16 +458,38 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size, * CMA allocations we've got laying around and try again. */ vc4_bo_cache_purge(dev); + cma_obj = drm_gem_cma_create(dev, size); + } + if (IS_ERR(cma_obj)) { + /* + * Still not enough CMA memory, purge the userspace BO + * cache and retry. + * This is sub-optimal since we purge the whole userspace + * BO cache which forces user that want to re-use the BO to + * restore its initial content. + * Ideally, we should purge entries one by one and retry + * after each to see if CMA allocation succeeds. Or even + * better, try to find an entry with at least the same + * size. + */ + vc4_bo_userspace_cache_purge(dev); cma_obj = drm_gem_cma_create(dev, size); - if (IS_ERR(cma_obj)) { - DRM_ERROR("Failed to allocate from CMA:\n"); - vc4_bo_stats_dump(vc4); - return ERR_PTR(-ENOMEM); - } + } + + if (IS_ERR(cma_obj)) { + DRM_ERROR("Failed to allocate from CMA:\n"); + vc4_bo_stats_dump(vc4); + return ERR_PTR(-ENOMEM); } bo = to_vc4_bo(&cma_obj->base); + /* By default, BOs do not support the MADV ioctl. This will be enabled + * only on BOs that are exposed to userspace (V3D, V3D_SHADER and DUMB + * BOs). + */ + bo->madv = __VC4_MADV_NOTSUPP; + mutex_lock(&vc4->bo_lock); vc4_bo_set_label(&cma_obj->base, type); mutex_unlock(&vc4->bo_lock); @@ -365,6 +515,8 @@ int vc4_dumb_create(struct drm_file *file_priv, if (IS_ERR(bo)) return PTR_ERR(bo); + bo->madv = VC4_MADV_WILLNEED; + ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle); drm_gem_object_put_unlocked(&bo->base.base); @@ -403,6 +555,12 @@ void vc4_free_object(struct drm_gem_object *gem_bo) struct vc4_bo *bo = to_vc4_bo(gem_bo); struct list_head *cache_list; + /* Remove the BO from the purgeable list. */ + mutex_lock(&bo->madv_lock); + if (bo->madv == VC4_MADV_DONTNEED && !refcount_read(&bo->usecnt)) + vc4_bo_remove_from_purgeable_pool(bo); + mutex_unlock(&bo->madv_lock); + mutex_lock(&vc4->bo_lock); /* If the object references someone else's memory, we can't cache it. */ @@ -418,7 +576,8 @@ void vc4_free_object(struct drm_gem_object *gem_bo) } /* If this object was partially constructed but CMA allocation - * had failed, just free it. + * had failed, just free it. Can also happen when the BO has been + * purged. */ if (!bo->base.vaddr) { vc4_bo_destroy(bo); @@ -437,6 +596,10 @@ void vc4_free_object(struct drm_gem_object *gem_bo) bo->validated_shader = NULL; } + /* Reset madv and usecnt before adding the BO to the cache. */ + bo->madv = __VC4_MADV_NOTSUPP; + refcount_set(&bo->usecnt, 0); + bo->t_format = false; bo->free_time = jiffies; list_add(&bo->size_head, cache_list); @@ -461,6 +624,56 @@ static void vc4_bo_cache_time_work(struct work_struct *work) mutex_unlock(&vc4->bo_lock); } +int vc4_bo_inc_usecnt(struct vc4_bo *bo) +{ + int ret; + + /* Fast path: if the BO is already retained by someone, no need to + * check the madv status. + */ + if (refcount_inc_not_zero(&bo->usecnt)) + return 0; + + mutex_lock(&bo->madv_lock); + switch (bo->madv) { + case VC4_MADV_WILLNEED: + refcount_inc(&bo->usecnt); + ret = 0; + break; + case VC4_MADV_DONTNEED: + /* We shouldn't use a BO marked as purgeable if at least + * someone else retained its content by incrementing usecnt. + * Luckily the BO hasn't been purged yet, but something wrong + * is happening here. Just throw an error instead of + * authorizing this use case. + */ + case __VC4_MADV_PURGED: + /* We can't use a purged BO. */ + default: + /* Invalid madv value. */ + ret = -EINVAL; + break; + } + mutex_unlock(&bo->madv_lock); + + return ret; +} + +void vc4_bo_dec_usecnt(struct vc4_bo *bo) +{ + /* Fast path: if the BO is still retained by someone, no need to test + * the madv value. + */ + if (refcount_dec_not_one(&bo->usecnt)) + return; + + mutex_lock(&bo->madv_lock); + if (refcount_dec_and_test(&bo->usecnt) && + bo->madv == VC4_MADV_DONTNEED) + vc4_bo_add_to_purgeable_pool(bo); + mutex_unlock(&bo->madv_lock); +} + static void vc4_bo_cache_time_timer(unsigned long data) { struct drm_device *dev = (struct drm_device *)data; @@ -480,18 +693,52 @@ struct dma_buf * vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags) { struct vc4_bo *bo = to_vc4_bo(obj); + struct dma_buf *dmabuf; + int ret; if (bo->validated_shader) { DRM_DEBUG("Attempting to export shader BO\n"); return ERR_PTR(-EINVAL); } - return drm_gem_prime_export(dev, obj, flags); + /* Note: as soon as the BO is exported it becomes unpurgeable, because + * noone ever decrements the usecnt even if the reference held by the + * exported BO is released. This shouldn't be a problem since we don't + * expect exported BOs to be marked as purgeable. + */ + ret = vc4_bo_inc_usecnt(bo); + if (ret) { + DRM_ERROR("Failed to increment BO usecnt\n"); + return ERR_PTR(ret); + } + + dmabuf = drm_gem_prime_export(dev, obj, flags); + if (IS_ERR(dmabuf)) + vc4_bo_dec_usecnt(bo); + + return dmabuf; +} + +int vc4_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct drm_gem_object *obj = vma->vm_private_data; + struct vc4_bo *bo = to_vc4_bo(obj); + + /* The only reason we would end up here is when user-space accesses + * BO's memory after it's been purged. + */ + mutex_lock(&bo->madv_lock); + WARN_ON(bo->madv != __VC4_MADV_PURGED); + mutex_unlock(&bo->madv_lock); + + return VM_FAULT_SIGBUS; } int vc4_mmap(struct file *filp, struct vm_area_struct *vma) { struct drm_gem_object *gem_obj; + unsigned long vm_pgoff; struct vc4_bo *bo; int ret; @@ -507,16 +754,36 @@ int vc4_mmap(struct file *filp, struct vm_area_struct *vma) return -EINVAL; } + if (bo->madv != VC4_MADV_WILLNEED) { + DRM_DEBUG("mmaping of %s BO not allowed\n", + bo->madv == VC4_MADV_DONTNEED ? + "purgeable" : "purged"); + return -EINVAL; + } + /* * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map * the whole buffer. */ vma->vm_flags &= ~VM_PFNMAP; - vma->vm_pgoff = 0; + /* This ->vm_pgoff dance is needed to make all parties happy: + * - dma_mmap_wc() uses ->vm_pgoff as an offset within the allocated + * mem-region, hence the need to set it to zero (the value set by + * the DRM core is a virtual offset encoding the GEM object-id) + * - the mmap() core logic needs ->vm_pgoff to be restored to its + * initial value before returning from this function because it + * encodes the offset of this GEM in the dev->anon_inode pseudo-file + * and this information will be used when we invalidate userspace + * mappings with drm_vma_node_unmap() (called from vc4_gem_purge()). + */ + vm_pgoff = vma->vm_pgoff; + vma->vm_pgoff = 0; ret = dma_mmap_wc(bo->base.base.dev->dev, vma, bo->base.vaddr, bo->base.paddr, vma->vm_end - vma->vm_start); + vma->vm_pgoff = vm_pgoff; + if (ret) drm_gem_vm_close(vma); @@ -580,6 +847,8 @@ int vc4_create_bo_ioctl(struct drm_device *dev, void *data, if (IS_ERR(bo)) return PTR_ERR(bo); + bo->madv = VC4_MADV_WILLNEED; + ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle); drm_gem_object_put_unlocked(&bo->base.base); @@ -633,6 +902,8 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, if (IS_ERR(bo)) return PTR_ERR(bo); + bo->madv = VC4_MADV_WILLNEED; + if (copy_from_user(bo->base.vaddr, (void __user *)(uintptr_t)args->data, args->size)) { diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c index 519cefef800d..72c9dbd81d7f 100644 --- a/drivers/gpu/drm/vc4/vc4_dpi.c +++ b/drivers/gpu/drm/vc4/vc4_dpi.c @@ -97,8 +97,6 @@ struct vc4_dpi { struct drm_encoder *encoder; struct drm_connector *connector; - struct drm_bridge *bridge; - bool is_panel_bridge; void __iomem *regs; @@ -251,10 +249,11 @@ static int vc4_dpi_init_bridge(struct vc4_dpi *dpi) { struct device *dev = &dpi->pdev->dev; struct drm_panel *panel; + struct drm_bridge *bridge; int ret; ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0, - &panel, &dpi->bridge); + &panel, &bridge); if (ret) { /* If nothing was connected in the DT, that's not an * error. @@ -265,13 +264,10 @@ static int vc4_dpi_init_bridge(struct vc4_dpi *dpi) return ret; } - if (panel) { - dpi->bridge = drm_panel_bridge_add(panel, - DRM_MODE_CONNECTOR_DPI); - dpi->is_panel_bridge = true; - } + if (panel) + bridge = drm_panel_bridge_add(panel, DRM_MODE_CONNECTOR_DPI); - return drm_bridge_attach(dpi->encoder, dpi->bridge, NULL); + return drm_bridge_attach(dpi->encoder, bridge, NULL); } static int vc4_dpi_bind(struct device *dev, struct device *master, void *data) @@ -352,8 +348,7 @@ static void vc4_dpi_unbind(struct device *dev, struct device *master, struct vc4_dev *vc4 = to_vc4_dev(drm); struct vc4_dpi *dpi = dev_get_drvdata(dev); - if (dpi->is_panel_bridge) - drm_panel_bridge_remove(dpi->bridge); + drm_of_panel_bridge_remove(dev->of_node, 0, 0); drm_encoder_cleanup(dpi->encoder); diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 1c96edcb302b..e3c29729da2e 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -100,6 +100,7 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data, case DRM_VC4_PARAM_SUPPORTS_ETC1: case DRM_VC4_PARAM_SUPPORTS_THREADED_FS: case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER: + case DRM_VC4_PARAM_SUPPORTS_MADVISE: args->value = true; break; default: @@ -117,6 +118,12 @@ static void vc4_lastclose(struct drm_device *dev) drm_fbdev_cma_restore_mode(vc4->fbdev); } +static const struct vm_operations_struct vc4_vm_ops = { + .fault = vc4_fault, + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, +}; + static const struct file_operations vc4_drm_fops = { .owner = THIS_MODULE, .open = drm_open, @@ -142,6 +149,7 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(VC4_SET_TILING, vc4_set_tiling_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VC4_GET_TILING, vc4_get_tiling_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VC4_LABEL_BO, vc4_label_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_GEM_MADVISE, vc4_gem_madvise_ioctl, DRM_RENDER_ALLOW), }; static struct drm_driver vc4_drm_driver = { @@ -166,7 +174,7 @@ static struct drm_driver vc4_drm_driver = { .gem_create_object = vc4_create_object, .gem_free_object_unlocked = vc4_free_object, - .gem_vm_ops = &drm_gem_cma_vm_ops, + .gem_vm_ops = &vc4_vm_ops, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 87f2d8e5c134..9c0d380c96f2 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -74,6 +74,19 @@ struct vc4_dev { /* Protects bo_cache and bo_labels. */ struct mutex bo_lock; + /* Purgeable BO pool. All BOs in this pool can have their memory + * reclaimed if the driver is unable to allocate new BOs. We also + * keep stats related to the purge mechanism here. + */ + struct { + struct list_head list; + unsigned int num; + size_t size; + unsigned int purged_num; + size_t purged_size; + struct mutex lock; + } purgeable; + uint64_t dma_fence_context; /* Sequence number for the last job queued in bin_job_list. @@ -192,6 +205,16 @@ struct vc4_bo { * for user-allocated labels. */ int label; + + /* Count the number of active users. This is needed to determine + * whether we can move the BO to the purgeable list or not (when the BO + * is used by the GPU or the display engine we can't purge it). + */ + refcount_t usecnt; + + /* Store purgeable/purged state here */ + u32 madv; + struct mutex madv_lock; }; static inline struct vc4_bo * @@ -503,6 +526,7 @@ int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int vc4_label_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int vc4_fault(struct vm_fault *vmf); int vc4_mmap(struct file *filp, struct vm_area_struct *vma); struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj); int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); @@ -513,6 +537,10 @@ void *vc4_prime_vmap(struct drm_gem_object *obj); int vc4_bo_cache_init(struct drm_device *dev); void vc4_bo_cache_destroy(struct drm_device *dev); int vc4_bo_stats_debugfs(struct seq_file *m, void *arg); +int vc4_bo_inc_usecnt(struct vc4_bo *bo); +void vc4_bo_dec_usecnt(struct vc4_bo *bo); +void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo); +void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo); /* vc4_crtc.c */ extern struct platform_driver vc4_crtc_driver; @@ -557,6 +585,8 @@ void vc4_job_handle_completed(struct vc4_dev *vc4); int vc4_queue_seqno_cb(struct drm_device *dev, struct vc4_seqno_cb *cb, uint64_t seqno, void (*func)(struct vc4_seqno_cb *cb)); +int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); /* vc4_hdmi.c */ extern struct platform_driver vc4_hdmi_driver; diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c index d1e0dc908048..94085f8bcd68 100644 --- a/drivers/gpu/drm/vc4/vc4_dsi.c +++ b/drivers/gpu/drm/vc4/vc4_dsi.c @@ -33,6 +33,7 @@ #include <drm/drm_crtc_helper.h> #include <drm/drm_edid.h> #include <drm/drm_mipi_dsi.h> +#include <drm/drm_of.h> #include <drm/drm_panel.h> #include <linux/clk.h> #include <linux/clk-provider.h> @@ -504,7 +505,6 @@ struct vc4_dsi { struct mipi_dsi_host dsi_host; struct drm_encoder *encoder; struct drm_bridge *bridge; - bool is_panel_bridge; void __iomem *regs; @@ -859,14 +859,11 @@ static bool vc4_dsi_encoder_mode_fixup(struct drm_encoder *encoder, pll_clock = parent_rate / divider; pixel_clock_hz = pll_clock / dsi->divider; - /* Round up the clk_set_rate() request slightly, since - * PLLD_DSI1 is an integer divider and its rate selection will - * never round up. - */ - adjusted_mode->clock = pixel_clock_hz / 1000 + 1; + adjusted_mode->clock = pixel_clock_hz / 1000; /* Given the new pixel clock, adjust HFP to keep vrefresh the same. */ - adjusted_mode->htotal = pixel_clock_hz / (mode->vrefresh * mode->vtotal); + adjusted_mode->htotal = adjusted_mode->clock * mode->htotal / + mode->clock; adjusted_mode->hsync_end += adjusted_mode->htotal - mode->htotal; adjusted_mode->hsync_start += adjusted_mode->htotal - mode->htotal; @@ -900,7 +897,11 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder) vc4_dsi_dump_regs(dsi); } - phy_clock = pixel_clock_hz * dsi->divider; + /* Round up the clk_set_rate() request slightly, since + * PLLD_DSI1 is an integer divider and its rate selection will + * never round up. + */ + phy_clock = (pixel_clock_hz + 1000) * dsi->divider; ret = clk_set_rate(dsi->pll_phy_clock, phy_clock); if (ret) { dev_err(&dsi->pdev->dev, @@ -1288,7 +1289,6 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host, struct mipi_dsi_device *device) { struct vc4_dsi *dsi = host_to_dsi(host); - int ret = 0; dsi->lanes = device->lanes; dsi->channel = device->channel; @@ -1323,34 +1323,12 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host, return 0; } - dsi->bridge = of_drm_find_bridge(device->dev.of_node); - if (!dsi->bridge) { - struct drm_panel *panel = - of_drm_find_panel(device->dev.of_node); - - dsi->bridge = drm_panel_bridge_add(panel, - DRM_MODE_CONNECTOR_DSI); - if (IS_ERR(dsi->bridge)) { - ret = PTR_ERR(dsi->bridge); - dsi->bridge = NULL; - return ret; - } - dsi->is_panel_bridge = true; - } - - return drm_bridge_attach(dsi->encoder, dsi->bridge, NULL); + return 0; } static int vc4_dsi_host_detach(struct mipi_dsi_host *host, struct mipi_dsi_device *device) { - struct vc4_dsi *dsi = host_to_dsi(host); - - if (dsi->is_panel_bridge) { - drm_panel_bridge_remove(dsi->bridge); - dsi->bridge = NULL; - } - return 0; } @@ -1382,6 +1360,27 @@ static void dsi_handle_error(struct vc4_dsi *dsi, *ret = IRQ_HANDLED; } +/* + * Initial handler for port 1 where we need the reg_dma workaround. + * The register DMA writes sleep, so we can't do it in the top half. + * Instead we use IRQF_ONESHOT so that the IRQ gets disabled in the + * parent interrupt contrller until our interrupt thread is done. + */ +static irqreturn_t vc4_dsi_irq_defer_to_thread_handler(int irq, void *data) +{ + struct vc4_dsi *dsi = data; + u32 stat = DSI_PORT_READ(INT_STAT); + + if (!stat) + return IRQ_NONE; + + return IRQ_WAKE_THREAD; +} + +/* + * Normal IRQ handler for port 0, or the threaded IRQ handler for port + * 1 where we need the reg_dma workaround. + */ static irqreturn_t vc4_dsi_irq_handler(int irq, void *data) { struct vc4_dsi *dsi = data; @@ -1492,16 +1491,13 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data) struct platform_device *pdev = to_platform_device(dev); struct drm_device *drm = dev_get_drvdata(master); struct vc4_dev *vc4 = to_vc4_dev(drm); - struct vc4_dsi *dsi; + struct vc4_dsi *dsi = dev_get_drvdata(dev); struct vc4_dsi_encoder *vc4_dsi_encoder; + struct drm_panel *panel; const struct of_device_id *match; dma_cap_mask_t dma_mask; int ret; - dsi = devm_kzalloc(dev, sizeof(*dsi), GFP_KERNEL); - if (!dsi) - return -ENOMEM; - match = of_match_device(vc4_dsi_dt_match, dev); if (!match) return -ENODEV; @@ -1516,7 +1512,6 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data) vc4_dsi_encoder->dsi = dsi; dsi->encoder = &vc4_dsi_encoder->base.base; - dsi->pdev = pdev; dsi->regs = vc4_ioremap_regs(pdev, 0); if (IS_ERR(dsi->regs)) return PTR_ERR(dsi->regs); @@ -1565,8 +1560,15 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data) /* Clear any existing interrupt state. */ DSI_PORT_WRITE(INT_STAT, DSI_PORT_READ(INT_STAT)); - ret = devm_request_irq(dev, platform_get_irq(pdev, 0), - vc4_dsi_irq_handler, 0, "vc4 dsi", dsi); + if (dsi->reg_dma_mem) + ret = devm_request_threaded_irq(dev, platform_get_irq(pdev, 0), + vc4_dsi_irq_defer_to_thread_handler, + vc4_dsi_irq_handler, + IRQF_ONESHOT, + "vc4 dsi", dsi); + else + ret = devm_request_irq(dev, platform_get_irq(pdev, 0), + vc4_dsi_irq_handler, 0, "vc4 dsi", dsi); if (ret) { if (ret != -EPROBE_DEFER) dev_err(dev, "Failed to get interrupt: %d\n", ret); @@ -1597,6 +1599,18 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data) return ret; } + ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0, + &panel, &dsi->bridge); + if (ret) + return ret; + + if (panel) { + dsi->bridge = devm_drm_panel_bridge_add(dev, panel, + DRM_MODE_CONNECTOR_DSI); + if (IS_ERR(dsi->bridge)) + return PTR_ERR(dsi->bridge); + } + /* The esc clock rate is supposed to always be 100Mhz. */ ret = clk_set_rate(dsi->escape_clock, 100 * 1000000); if (ret) { @@ -1615,12 +1629,11 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data) DRM_MODE_ENCODER_DSI, NULL); drm_encoder_helper_add(dsi->encoder, &vc4_dsi_encoder_helper_funcs); - dsi->dsi_host.ops = &vc4_dsi_host_ops; - dsi->dsi_host.dev = dev; - - mipi_dsi_host_register(&dsi->dsi_host); - - dev_set_drvdata(dev, dsi); + ret = drm_bridge_attach(dsi->encoder, dsi->bridge, NULL); + if (ret) { + dev_err(dev, "bridge attach failed: %d\n", ret); + return ret; + } pm_runtime_enable(dev); @@ -1638,8 +1651,6 @@ static void vc4_dsi_unbind(struct device *dev, struct device *master, vc4_dsi_encoder_destroy(dsi->encoder); - mipi_dsi_host_unregister(&dsi->dsi_host); - if (dsi->port == 1) vc4->dsi1 = NULL; } @@ -1651,12 +1662,47 @@ static const struct component_ops vc4_dsi_ops = { static int vc4_dsi_dev_probe(struct platform_device *pdev) { - return component_add(&pdev->dev, &vc4_dsi_ops); + struct device *dev = &pdev->dev; + struct vc4_dsi *dsi; + int ret; + + dsi = devm_kzalloc(dev, sizeof(*dsi), GFP_KERNEL); + if (!dsi) + return -ENOMEM; + dev_set_drvdata(dev, dsi); + + dsi->pdev = pdev; + + /* Note, the initialization sequence for DSI and panels is + * tricky. The component bind above won't get past its + * -EPROBE_DEFER until the panel/bridge probes. The + * panel/bridge will return -EPROBE_DEFER until it has a + * mipi_dsi_host to register its device to. So, we register + * the host during pdev probe time, so vc4 as a whole can then + * -EPROBE_DEFER its component bind process until the panel + * successfully attaches. + */ + dsi->dsi_host.ops = &vc4_dsi_host_ops; + dsi->dsi_host.dev = dev; + mipi_dsi_host_register(&dsi->dsi_host); + + ret = component_add(&pdev->dev, &vc4_dsi_ops); + if (ret) { + mipi_dsi_host_unregister(&dsi->dsi_host); + return ret; + } + + return 0; } static int vc4_dsi_dev_remove(struct platform_device *pdev) { + struct device *dev = &pdev->dev; + struct vc4_dsi *dsi = dev_get_drvdata(dev); + component_del(&pdev->dev, &vc4_dsi_ops); + mipi_dsi_host_unregister(&dsi->dsi_host); + return 0; } diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index d0c6bfb68c4e..e00ac2f3a264 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -188,11 +188,22 @@ vc4_save_hang_state(struct drm_device *dev) continue; for (j = 0; j < exec[i]->bo_count; j++) { + bo = to_vc4_bo(&exec[i]->bo[j]->base); + + /* Retain BOs just in case they were marked purgeable. + * This prevents the BO from being purged before + * someone had a chance to dump the hang state. + */ + WARN_ON(!refcount_read(&bo->usecnt)); + refcount_inc(&bo->usecnt); drm_gem_object_get(&exec[i]->bo[j]->base); kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base; } list_for_each_entry(bo, &exec[i]->unref_list, unref_head) { + /* No need to retain BOs coming from the ->unref_list + * because they are naturally unpurgeable. + */ drm_gem_object_get(&bo->base.base); kernel_state->bo[j + prev_idx] = &bo->base.base; j++; @@ -233,6 +244,26 @@ vc4_save_hang_state(struct drm_device *dev) state->fdbgs = V3D_READ(V3D_FDBGS); state->errstat = V3D_READ(V3D_ERRSTAT); + /* We need to turn purgeable BOs into unpurgeable ones so that + * userspace has a chance to dump the hang state before the kernel + * decides to purge those BOs. + * Note that BO consistency at dump time cannot be guaranteed. For + * example, if the owner of these BOs decides to re-use them or mark + * them purgeable again there's nothing we can do to prevent it. + */ + for (i = 0; i < kernel_state->user_state.bo_count; i++) { + struct vc4_bo *bo = to_vc4_bo(kernel_state->bo[i]); + + if (bo->madv == __VC4_MADV_NOTSUPP) + continue; + + mutex_lock(&bo->madv_lock); + if (!WARN_ON(bo->madv == __VC4_MADV_PURGED)) + bo->madv = VC4_MADV_WILLNEED; + refcount_dec(&bo->usecnt); + mutex_unlock(&bo->madv_lock); + } + spin_lock_irqsave(&vc4->job_lock, irqflags); if (vc4->hang_state) { spin_unlock_irqrestore(&vc4->job_lock, irqflags); @@ -639,9 +670,6 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, * The command validator needs to reference BOs by their index within * the submitted job's BO list. This does the validation of the job's * BO list and reference counting for the lifetime of the job. - * - * Note that this function doesn't need to unreference the BOs on - * failure, because that will happen at vc4_complete_exec() time. */ static int vc4_cl_lookup_bos(struct drm_device *dev, @@ -693,16 +721,47 @@ vc4_cl_lookup_bos(struct drm_device *dev, DRM_DEBUG("Failed to look up GEM BO %d: %d\n", i, handles[i]); ret = -EINVAL; - spin_unlock(&file_priv->table_lock); - goto fail; + break; } + drm_gem_object_get(bo); exec->bo[i] = (struct drm_gem_cma_object *)bo; } spin_unlock(&file_priv->table_lock); + if (ret) + goto fail_put_bo; + + for (i = 0; i < exec->bo_count; i++) { + ret = vc4_bo_inc_usecnt(to_vc4_bo(&exec->bo[i]->base)); + if (ret) + goto fail_dec_usecnt; + } + + kvfree(handles); + return 0; + +fail_dec_usecnt: + /* Decrease usecnt on acquired objects. + * We cannot rely on vc4_complete_exec() to release resources here, + * because vc4_complete_exec() has no information about which BO has + * had its ->usecnt incremented. + * To make things easier we just free everything explicitly and set + * exec->bo to NULL so that vc4_complete_exec() skips the 'BO release' + * step. + */ + for (i-- ; i >= 0; i--) + vc4_bo_dec_usecnt(to_vc4_bo(&exec->bo[i]->base)); + +fail_put_bo: + /* Release any reference to acquired objects. */ + for (i = 0; i < exec->bo_count && exec->bo[i]; i++) + drm_gem_object_put_unlocked(&exec->bo[i]->base); + fail: kvfree(handles); + kvfree(exec->bo); + exec->bo = NULL; return ret; } @@ -833,8 +892,12 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) dma_fence_signal(exec->fence); if (exec->bo) { - for (i = 0; i < exec->bo_count; i++) + for (i = 0; i < exec->bo_count; i++) { + struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base); + + vc4_bo_dec_usecnt(bo); drm_gem_object_put_unlocked(&exec->bo[i]->base); + } kvfree(exec->bo); } @@ -1098,6 +1161,9 @@ vc4_gem_init(struct drm_device *dev) INIT_WORK(&vc4->job_done_work, vc4_job_done_work); mutex_init(&vc4->power_lock); + + INIT_LIST_HEAD(&vc4->purgeable.list); + mutex_init(&vc4->purgeable.lock); } void @@ -1121,3 +1187,81 @@ vc4_gem_destroy(struct drm_device *dev) if (vc4->hang_state) vc4_free_hang_state(dev, vc4->hang_state); } + +int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_vc4_gem_madvise *args = data; + struct drm_gem_object *gem_obj; + struct vc4_bo *bo; + int ret; + + switch (args->madv) { + case VC4_MADV_DONTNEED: + case VC4_MADV_WILLNEED: + break; + default: + return -EINVAL; + } + + if (args->pad != 0) + return -EINVAL; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + + bo = to_vc4_bo(gem_obj); + + /* Only BOs exposed to userspace can be purged. */ + if (bo->madv == __VC4_MADV_NOTSUPP) { + DRM_DEBUG("madvise not supported on this BO\n"); + ret = -EINVAL; + goto out_put_gem; + } + + /* Not sure it's safe to purge imported BOs. Let's just assume it's + * not until proven otherwise. + */ + if (gem_obj->import_attach) { + DRM_DEBUG("madvise not supported on imported BOs\n"); + ret = -EINVAL; + goto out_put_gem; + } + + mutex_lock(&bo->madv_lock); + + if (args->madv == VC4_MADV_DONTNEED && bo->madv == VC4_MADV_WILLNEED && + !refcount_read(&bo->usecnt)) { + /* If the BO is about to be marked as purgeable, is not used + * and is not already purgeable or purged, add it to the + * purgeable list. + */ + vc4_bo_add_to_purgeable_pool(bo); + } else if (args->madv == VC4_MADV_WILLNEED && + bo->madv == VC4_MADV_DONTNEED && + !refcount_read(&bo->usecnt)) { + /* The BO has not been purged yet, just remove it from + * the purgeable list. + */ + vc4_bo_remove_from_purgeable_pool(bo); + } + + /* Save the purged state. */ + args->retained = bo->madv != __VC4_MADV_PURGED; + + /* Update internal madv state only if the bo was not purged. */ + if (bo->madv != __VC4_MADV_PURGED) + bo->madv = args->madv; + + mutex_unlock(&bo->madv_lock); + + ret = 0; + +out_put_gem: + drm_gem_object_put_unlocked(gem_obj); + + return ret; +} diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 937da8dd65b8..fa37a1c07cf6 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -309,16 +309,13 @@ static const struct drm_connector_helper_funcs vc4_hdmi_connector_helper_funcs = static struct drm_connector *vc4_hdmi_connector_init(struct drm_device *dev, struct drm_encoder *encoder) { - struct drm_connector *connector = NULL; + struct drm_connector *connector; struct vc4_hdmi_connector *hdmi_connector; - int ret = 0; hdmi_connector = devm_kzalloc(dev->dev, sizeof(*hdmi_connector), GFP_KERNEL); - if (!hdmi_connector) { - ret = -ENOMEM; - goto fail; - } + if (!hdmi_connector) + return ERR_PTR(-ENOMEM); connector = &hdmi_connector->base; hdmi_connector->encoder = encoder; @@ -336,12 +333,6 @@ static struct drm_connector *vc4_hdmi_connector_init(struct drm_device *dev, drm_mode_connector_attach_encoder(connector, encoder); return connector; - - fail: - if (connector) - vc4_hdmi_connector_destroy(connector); - - return ERR_PTR(ret); } static void vc4_hdmi_encoder_destroy(struct drm_encoder *encoder) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 2968b3ebb895..423a23ed8fc2 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -23,6 +23,7 @@ #include <drm/drm_fb_cma_helper.h> #include <drm/drm_plane_helper.h> +#include "uapi/drm/vc4_drm.h" #include "vc4_drv.h" #include "vc4_regs.h" @@ -547,14 +548,24 @@ static int vc4_plane_mode_set(struct drm_plane *plane, tiling = SCALER_CTL0_TILING_LINEAR; pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH); break; - case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: + + case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: { + /* For T-tiled, the FB pitch is "how many bytes from + * one row to the next, such that pitch * tile_h == + * tile_size * tiles_per_row." + */ + u32 tile_size_shift = 12; /* T tiles are 4kb */ + u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */ + u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift); + tiling = SCALER_CTL0_TILING_256B_OR_T; - pitch0 = (VC4_SET_FIELD(0, SCALER_PITCH0_TILE_Y_OFFSET), - VC4_SET_FIELD(0, SCALER_PITCH0_TILE_WIDTH_L), - VC4_SET_FIELD((vc4_state->src_w[0] + 31) >> 5, - SCALER_PITCH0_TILE_WIDTH_R)); + pitch0 = (VC4_SET_FIELD(0, SCALER_PITCH0_TILE_Y_OFFSET) | + VC4_SET_FIELD(0, SCALER_PITCH0_TILE_WIDTH_L) | + VC4_SET_FIELD(tiles_w, SCALER_PITCH0_TILE_WIDTH_R)); break; + } + default: DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx", (long long)fb->modifier); @@ -764,21 +775,40 @@ static int vc4_prepare_fb(struct drm_plane *plane, { struct vc4_bo *bo; struct dma_fence *fence; + int ret; if ((plane->state->fb == state->fb) || !state->fb) return 0; bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base); + + ret = vc4_bo_inc_usecnt(bo); + if (ret) + return ret; + fence = reservation_object_get_excl_rcu(bo->resv); drm_atomic_set_fence_for_plane(state, fence); return 0; } +static void vc4_cleanup_fb(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct vc4_bo *bo; + + if (plane->state->fb == state->fb || !state->fb) + return; + + bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base); + vc4_bo_dec_usecnt(bo); +} + static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { .atomic_check = vc4_plane_atomic_check, .atomic_update = vc4_plane_atomic_update, .prepare_fb = vc4_prepare_fb, + .cleanup_fb = vc4_cleanup_fb, }; static void vc4_plane_destroy(struct drm_plane *plane) diff --git a/drivers/gpu/drm/vc4/vc4_trace.h b/drivers/gpu/drm/vc4/vc4_trace.h index ad7b1ea720c2..deafb32923e1 100644 --- a/drivers/gpu/drm/vc4/vc4_trace.h +++ b/drivers/gpu/drm/vc4/vc4_trace.h @@ -59,5 +59,5 @@ TRACE_EVENT(vc4_wait_for_seqno_end, /* This part must be outside protection */ #undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/vc4 #include <trace/define_trace.h> |