diff options
Diffstat (limited to 'drivers/gpu/drm/panfrost')
-rw-r--r-- | drivers/gpu/drm/panfrost/Makefile | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/panfrost/panfrost_device.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/panfrost/panfrost_device.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/panfrost/panfrost_drv.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/panfrost/panfrost_gpu.c | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/panfrost/panfrost_perfcnt.c | 329 | ||||
-rw-r--r-- | drivers/gpu/drm/panfrost/panfrost_perfcnt.h | 18 | ||||
-rw-r--r-- | drivers/gpu/drm/panfrost/panfrost_regs.h | 19 |
8 files changed, 390 insertions, 1 deletions
diff --git a/drivers/gpu/drm/panfrost/Makefile b/drivers/gpu/drm/panfrost/Makefile index 6de72d13c58f..ecf0864cb515 100644 --- a/drivers/gpu/drm/panfrost/Makefile +++ b/drivers/gpu/drm/panfrost/Makefile @@ -7,6 +7,7 @@ panfrost-y := \ panfrost_gem.o \ panfrost_gpu.o \ panfrost_job.o \ - panfrost_mmu.o + panfrost_mmu.o \ + panfrost_perfcnt.o obj-$(CONFIG_DRM_PANFROST) += panfrost.o diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c index ccb8eb2a518c..8a111d7c0200 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.c +++ b/drivers/gpu/drm/panfrost/panfrost_device.c @@ -14,6 +14,7 @@ #include "panfrost_gpu.h" #include "panfrost_job.h" #include "panfrost_mmu.h" +#include "panfrost_perfcnt.h" static int panfrost_reset_init(struct panfrost_device *pfdev) { @@ -171,7 +172,13 @@ int panfrost_device_init(struct panfrost_device *pfdev) pm_runtime_mark_last_busy(pfdev->dev); pm_runtime_put_autosuspend(pfdev->dev); + err = panfrost_perfcnt_init(pfdev); + if (err) + goto err_out5; + return 0; +err_out5: + panfrost_job_fini(pfdev); err_out4: panfrost_mmu_fini(pfdev); err_out3: @@ -187,6 +194,7 @@ err_out0: void panfrost_device_fini(struct panfrost_device *pfdev) { + panfrost_perfcnt_fini(pfdev); panfrost_job_fini(pfdev); panfrost_mmu_fini(pfdev); panfrost_gpu_fini(pfdev); diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h index e86581c4af7b..83cc01cafde1 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.h +++ b/drivers/gpu/drm/panfrost/panfrost_device.h @@ -14,6 +14,7 @@ struct panfrost_device; struct panfrost_mmu; struct panfrost_job_slot; struct panfrost_job; +struct panfrost_perfcnt; #define NUM_JOB_SLOTS 3 @@ -78,6 +79,8 @@ struct panfrost_device { struct panfrost_job *jobs[NUM_JOB_SLOTS]; struct list_head scheduled_jobs; + struct panfrost_perfcnt *perfcnt; + struct mutex sched_lock; struct mutex reset_lock; diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 754881ece8d7..e34e86a7378a 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -19,6 +19,7 @@ #include "panfrost_mmu.h" #include "panfrost_job.h" #include "panfrost_gpu.h" +#include "panfrost_perfcnt.h" static bool unstable_ioctls; module_param_unsafe(unstable_ioctls, bool, 0600); @@ -329,6 +330,7 @@ panfrost_postclose(struct drm_device *dev, struct drm_file *file) { struct panfrost_file_priv *panfrost_priv = file->driver_priv; + panfrost_perfcnt_close(panfrost_priv); panfrost_job_close(panfrost_priv); kfree(panfrost_priv); @@ -348,6 +350,8 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = { PANFROST_IOCTL(MMAP_BO, mmap_bo, DRM_RENDER_ALLOW), PANFROST_IOCTL(GET_PARAM, get_param, DRM_RENDER_ALLOW), PANFROST_IOCTL(GET_BO_OFFSET, get_bo_offset, DRM_RENDER_ALLOW), + PANFROST_IOCTL(PERFCNT_ENABLE, perfcnt_enable, DRM_RENDER_ALLOW), + PANFROST_IOCTL(PERFCNT_DUMP, perfcnt_dump, DRM_RENDER_ALLOW), }; DEFINE_DRM_GEM_SHMEM_FOPS(panfrost_drm_driver_fops); diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c index 6e68a100291c..20ab333fc925 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -15,6 +15,7 @@ #include "panfrost_features.h" #include "panfrost_issues.h" #include "panfrost_gpu.h" +#include "panfrost_perfcnt.h" #include "panfrost_regs.h" static irqreturn_t panfrost_gpu_irq_handler(int irq, void *data) @@ -40,6 +41,12 @@ static irqreturn_t panfrost_gpu_irq_handler(int irq, void *data) gpu_write(pfdev, GPU_INT_MASK, 0); } + if (state & GPU_IRQ_PERFCNT_SAMPLE_COMPLETED) + panfrost_perfcnt_sample_done(pfdev); + + if (state & GPU_IRQ_CLEAN_CACHES_COMPLETED) + panfrost_perfcnt_clean_cache_done(pfdev); + gpu_write(pfdev, GPU_INT_CLEAR, state); return IRQ_HANDLED; diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c new file mode 100644 index 000000000000..83c57d325ca8 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c @@ -0,0 +1,329 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2019 Collabora Ltd */ + +#include <drm/drm_file.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/panfrost_drm.h> +#include <linux/completion.h> +#include <linux/iopoll.h> +#include <linux/pm_runtime.h> +#include <linux/slab.h> +#include <linux/uaccess.h> + +#include "panfrost_device.h" +#include "panfrost_features.h" +#include "panfrost_gem.h" +#include "panfrost_issues.h" +#include "panfrost_job.h" +#include "panfrost_mmu.h" +#include "panfrost_regs.h" + +#define COUNTERS_PER_BLOCK 64 +#define BYTES_PER_COUNTER 4 +#define BLOCKS_PER_COREGROUP 8 +#define V4_SHADERS_PER_COREGROUP 4 + +struct panfrost_perfcnt { + struct panfrost_gem_object *bo; + size_t bosize; + void *buf; + struct panfrost_file_priv *user; + struct mutex lock; + struct completion dump_comp; +}; + +void panfrost_perfcnt_clean_cache_done(struct panfrost_device *pfdev) +{ + complete(&pfdev->perfcnt->dump_comp); +} + +void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev) +{ + gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_CACHES); +} + +static int panfrost_perfcnt_dump_locked(struct panfrost_device *pfdev) +{ + u64 gpuva; + int ret; + + reinit_completion(&pfdev->perfcnt->dump_comp); + gpuva = pfdev->perfcnt->bo->node.start << PAGE_SHIFT; + gpu_write(pfdev, GPU_PERFCNT_BASE_LO, gpuva); + gpu_write(pfdev, GPU_PERFCNT_BASE_HI, gpuva >> 32); + gpu_write(pfdev, GPU_INT_CLEAR, + GPU_IRQ_CLEAN_CACHES_COMPLETED | + GPU_IRQ_PERFCNT_SAMPLE_COMPLETED); + gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_SAMPLE); + ret = wait_for_completion_interruptible_timeout(&pfdev->perfcnt->dump_comp, + msecs_to_jiffies(1000)); + if (!ret) + ret = -ETIMEDOUT; + else if (ret > 0) + ret = 0; + + return ret; +} + +static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev, + struct panfrost_file_priv *user, + unsigned int counterset) +{ + struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; + struct drm_gem_shmem_object *bo; + u32 cfg; + int ret; + + if (user == perfcnt->user) + return 0; + else if (perfcnt->user) + return -EBUSY; + + ret = pm_runtime_get_sync(pfdev->dev); + if (ret < 0) + return ret; + + bo = drm_gem_shmem_create(pfdev->ddev, perfcnt->bosize); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + perfcnt->bo = to_panfrost_bo(&bo->base); + + /* Map the perfcnt buf in the address space attached to file_priv. */ + ret = panfrost_mmu_map(perfcnt->bo); + if (ret) + goto err_put_bo; + + perfcnt->buf = drm_gem_shmem_vmap(&bo->base); + if (IS_ERR(perfcnt->buf)) { + ret = PTR_ERR(perfcnt->buf); + goto err_put_bo; + } + + /* + * Invalidate the cache and clear the counters to start from a fresh + * state. + */ + reinit_completion(&pfdev->perfcnt->dump_comp); + gpu_write(pfdev, GPU_INT_CLEAR, + GPU_IRQ_CLEAN_CACHES_COMPLETED | + GPU_IRQ_PERFCNT_SAMPLE_COMPLETED); + gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_CLEAR); + gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_INV_CACHES); + ret = wait_for_completion_timeout(&pfdev->perfcnt->dump_comp, + msecs_to_jiffies(1000)); + if (!ret) { + ret = -ETIMEDOUT; + goto err_vunmap; + } + + perfcnt->user = user; + + /* + * Always use address space 0 for now. + * FIXME: this needs to be updated when we start using different + * address space. + */ + cfg = GPU_PERFCNT_CFG_AS(0) | + GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_MANUAL); + + /* + * Bifrost GPUs have 2 set of counters, but we're only interested by + * the first one for now. + */ + if (panfrost_model_is_bifrost(pfdev)) + cfg |= GPU_PERFCNT_CFG_SETSEL(counterset); + + gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0xffffffff); + gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0xffffffff); + gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0xffffffff); + + /* + * Due to PRLAM-8186 we need to disable the Tiler before we enable HW + * counters. + */ + if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186)) + gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0); + else + gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff); + + gpu_write(pfdev, GPU_PERFCNT_CFG, cfg); + + if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186)) + gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff); + + return 0; + +err_vunmap: + drm_gem_shmem_vunmap(&perfcnt->bo->base.base, perfcnt->buf); +err_put_bo: + drm_gem_object_put_unlocked(&bo->base); + return ret; +} + +static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev, + struct panfrost_file_priv *user) +{ + struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; + + if (user != perfcnt->user) + return -EINVAL; + + gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0x0); + gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0x0); + gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0x0); + gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0); + gpu_write(pfdev, GPU_PERFCNT_CFG, + GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF)); + + perfcnt->user = NULL; + drm_gem_shmem_vunmap(&perfcnt->bo->base.base, perfcnt->buf); + perfcnt->buf = NULL; + drm_gem_object_put_unlocked(&perfcnt->bo->base.base); + perfcnt->bo = NULL; + pm_runtime_mark_last_busy(pfdev->dev); + pm_runtime_put_autosuspend(pfdev->dev); + + return 0; +} + +int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct panfrost_file_priv *pfile = file_priv->driver_priv; + struct panfrost_device *pfdev = dev->dev_private; + struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; + struct drm_panfrost_perfcnt_enable *req = data; + int ret; + + ret = panfrost_unstable_ioctl_check(); + if (ret) + return ret; + + /* Only Bifrost GPUs have 2 set of counters. */ + if (req->counterset > (panfrost_model_is_bifrost(pfdev) ? 1 : 0)) + return -EINVAL; + + mutex_lock(&perfcnt->lock); + if (req->enable) + ret = panfrost_perfcnt_enable_locked(pfdev, pfile, + req->counterset); + else + ret = panfrost_perfcnt_disable_locked(pfdev, pfile); + mutex_unlock(&perfcnt->lock); + + return ret; +} + +int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct panfrost_device *pfdev = dev->dev_private; + struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; + struct drm_panfrost_perfcnt_dump *req = data; + void __user *user_ptr = (void __user *)(uintptr_t)req->buf_ptr; + int ret; + + ret = panfrost_unstable_ioctl_check(); + if (ret) + return ret; + + mutex_lock(&perfcnt->lock); + if (perfcnt->user != file_priv->driver_priv) { + ret = -EINVAL; + goto out; + } + + ret = panfrost_perfcnt_dump_locked(pfdev); + if (ret) + goto out; + + if (copy_to_user(user_ptr, perfcnt->buf, perfcnt->bosize)) + ret = -EFAULT; + +out: + mutex_unlock(&perfcnt->lock); + + return ret; +} + +void panfrost_perfcnt_close(struct panfrost_file_priv *pfile) +{ + struct panfrost_device *pfdev = pfile->pfdev; + struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; + + pm_runtime_get_sync(pfdev->dev); + mutex_lock(&perfcnt->lock); + if (perfcnt->user == pfile) + panfrost_perfcnt_disable_locked(pfdev, pfile); + mutex_unlock(&perfcnt->lock); + pm_runtime_mark_last_busy(pfdev->dev); + pm_runtime_put_autosuspend(pfdev->dev); +} + +int panfrost_perfcnt_init(struct panfrost_device *pfdev) +{ + struct panfrost_perfcnt *perfcnt; + size_t size; + + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_V4)) { + unsigned int ncoregroups; + + ncoregroups = hweight64(pfdev->features.l2_present); + size = ncoregroups * BLOCKS_PER_COREGROUP * + COUNTERS_PER_BLOCK * BYTES_PER_COUNTER; + } else { + unsigned int nl2c, ncores; + + /* + * TODO: define a macro to extract the number of l2 caches from + * mem_features. + */ + nl2c = ((pfdev->features.mem_features >> 8) & GENMASK(3, 0)) + 1; + + /* + * shader_present might be sparse, but the counters layout + * forces to dump unused regions too, hence the fls64() call + * instead of hweight64(). + */ + ncores = fls64(pfdev->features.shader_present); + + /* + * There's always one JM and one Tiler block, hence the '+ 2' + * here. + */ + size = (nl2c + ncores + 2) * + COUNTERS_PER_BLOCK * BYTES_PER_COUNTER; + } + + perfcnt = devm_kzalloc(pfdev->dev, sizeof(*perfcnt), GFP_KERNEL); + if (!perfcnt) + return -ENOMEM; + + perfcnt->bosize = size; + + /* Start with everything disabled. */ + gpu_write(pfdev, GPU_PERFCNT_CFG, + GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF)); + gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0); + gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0); + gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0); + gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0); + + init_completion(&perfcnt->dump_comp); + mutex_init(&perfcnt->lock); + pfdev->perfcnt = perfcnt; + + return 0; +} + +void panfrost_perfcnt_fini(struct panfrost_device *pfdev) +{ + /* Disable everything before leaving. */ + gpu_write(pfdev, GPU_PERFCNT_CFG, + GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF)); + gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0); + gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0); + gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0); + gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0); +} diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.h b/drivers/gpu/drm/panfrost/panfrost_perfcnt.h new file mode 100644 index 000000000000..13b8fdaa1b43 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2019 Collabora Ltd */ +#ifndef __PANFROST_PERFCNT_H__ +#define __PANFROST_PERFCNT_H__ + +#include "panfrost_device.h" + +void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev); +void panfrost_perfcnt_clean_cache_done(struct panfrost_device *pfdev); +int panfrost_perfcnt_init(struct panfrost_device *pfdev); +void panfrost_perfcnt_fini(struct panfrost_device *pfdev); +void panfrost_perfcnt_close(struct panfrost_file_priv *pfile); +int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data, + struct drm_file *file_priv); + +#endif diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h index 42d08860fd76..ea38ac60581c 100644 --- a/drivers/gpu/drm/panfrost/panfrost_regs.h +++ b/drivers/gpu/drm/panfrost/panfrost_regs.h @@ -44,12 +44,31 @@ GPU_IRQ_MULTIPLE_FAULT) #define GPU_CMD 0x30 #define GPU_CMD_SOFT_RESET 0x01 +#define GPU_CMD_PERFCNT_CLEAR 0x03 +#define GPU_CMD_PERFCNT_SAMPLE 0x04 +#define GPU_CMD_CLEAN_CACHES 0x07 +#define GPU_CMD_CLEAN_INV_CACHES 0x08 #define GPU_STATUS 0x34 +#define GPU_STATUS_PRFCNT_ACTIVE BIT(2) #define GPU_LATEST_FLUSH_ID 0x38 #define GPU_FAULT_STATUS 0x3C #define GPU_FAULT_ADDRESS_LO 0x40 #define GPU_FAULT_ADDRESS_HI 0x44 +#define GPU_PERFCNT_BASE_LO 0x60 +#define GPU_PERFCNT_BASE_HI 0x64 +#define GPU_PERFCNT_CFG 0x68 +#define GPU_PERFCNT_CFG_MODE(x) (x) +#define GPU_PERFCNT_CFG_MODE_OFF 0 +#define GPU_PERFCNT_CFG_MODE_MANUAL 1 +#define GPU_PERFCNT_CFG_MODE_TILE 2 +#define GPU_PERFCNT_CFG_AS(x) ((x) << 4) +#define GPU_PERFCNT_CFG_SETSEL(x) ((x) << 8) +#define GPU_PRFCNT_JM_EN 0x6c +#define GPU_PRFCNT_SHADER_EN 0x70 +#define GPU_PRFCNT_TILER_EN 0x74 +#define GPU_PRFCNT_MMU_L2_EN 0x7c + #define GPU_THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ #define GPU_THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ #define GPU_THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ |