From a5fc7aa901b6818c67cc7e5cef8201fbaab8fa1b Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Fri, 23 Apr 2021 15:08:18 -0400 Subject: drm/msm: replace MSM_BO_UNCACHED with MSM_BO_WC for internal objects msm_gem_get_vaddr() currently always maps as writecombine, so use the right flag instead of relying on broken behavior (things don't actually work if they are mapped as uncached). Signed-off-by: Jonathan Marek Acked-by: Jordan Crouse Link: https://lore.kernel.org/r/20210423190833.25319-3-jonathan@marek.ca Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/msm/adreno/adreno_gpu.c') diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index cf897297656f..8fd0777f2dc9 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -391,7 +391,7 @@ struct drm_gem_object *adreno_fw_create_bo(struct msm_gpu *gpu, void *ptr; ptr = msm_gem_kernel_new_locked(gpu->dev, fw->size - 4, - MSM_BO_UNCACHED | MSM_BO_GPU_READONLY, gpu->aspace, &bo, iova); + MSM_BO_WC | MSM_BO_GPU_READONLY, gpu->aspace, &bo, iova); if (IS_ERR(ptr)) return ERR_CAST(ptr); -- cgit v1.2.3-70-g09d2 From 11120e9351d809b39a92f0e6e9b7e7848d4de98b Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sun, 14 Mar 2021 19:34:04 +0300 Subject: drm/msm: Convert to use resource-managed OPP API Use resource-managed OPP API to simplify code. Signed-off-by: Yangtao Li Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20210314163408.22292-12-digetx@gmail.com Reviewed-by: Viresh Kumar Signed-off-by: Dmitry Baryshkov Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 2 +- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 2 +- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 11 +++-------- drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 2 -- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 2 +- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 23 ++++++++--------------- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h | 2 -- drivers/gpu/drm/msm/dp/dp_ctrl.c | 30 ++++++------------------------ drivers/gpu/drm/msm/dp/dp_ctrl.h | 1 - drivers/gpu/drm/msm/dp/dp_display.c | 5 +---- drivers/gpu/drm/msm/dsi/dsi_host.c | 13 ++++--------- 11 files changed, 25 insertions(+), 68 deletions(-) (limited to 'drivers/gpu/drm/msm/adreno/adreno_gpu.c') diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index eb0f884eaf30..f46562c12022 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -1705,7 +1705,7 @@ static void check_speed_bin(struct device *dev) nvmem_cell_put(cell); } - dev_pm_opp_set_supported_hw(dev, &val, 1); + devm_pm_opp_set_supported_hw(dev, &val, 1); } struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 3d55e153fa9c..0517fead9319 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -1346,7 +1346,7 @@ static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu) * The GMU handles its own frequency switching so build a list of * available frequencies to send during initialization */ - ret = dev_pm_opp_of_add_table(gmu->dev); + ret = devm_pm_opp_of_add_table(gmu->dev); if (ret) { DRM_DEV_ERROR(gmu->dev, "Unable to set the OPP table for the GMU\n"); return ret; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 0b9713c2ff60..540d14e7cdae 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1339,9 +1339,6 @@ static void a6xx_destroy(struct msm_gpu *gpu) adreno_gpu_cleanup(adreno_gpu); - if (a6xx_gpu->opp_table) - dev_pm_opp_put_supported_hw(a6xx_gpu->opp_table); - kfree(a6xx_gpu); } @@ -1474,7 +1471,6 @@ static u32 fuse_to_supp_hw(struct device *dev, u32 revn, u32 fuse) static int a6xx_set_supported_hw(struct device *dev, struct a6xx_gpu *a6xx_gpu, u32 revn) { - struct opp_table *opp_table; u32 supp_hw = UINT_MAX; u16 speedbin; int ret; @@ -1497,11 +1493,10 @@ static int a6xx_set_supported_hw(struct device *dev, struct a6xx_gpu *a6xx_gpu, supp_hw = fuse_to_supp_hw(dev, revn, speedbin); done: - opp_table = dev_pm_opp_set_supported_hw(dev, &supp_hw, 1); - if (IS_ERR(opp_table)) - return PTR_ERR(opp_table); + ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1); + if (ret) + return ret; - a6xx_gpu->opp_table = opp_table; return 0; } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index bb544dfe5737..0bc2d062f54a 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -33,8 +33,6 @@ struct a6xx_gpu { void *llc_slice; void *htw_llc_slice; bool have_mmu500; - - struct opp_table *opp_table; }; #define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 8fd0777f2dc9..c1b02f790804 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -841,7 +841,7 @@ static void adreno_get_pwrlevels(struct device *dev, if (!of_find_property(dev->of_node, "operating-points-v2", NULL)) ret = adreno_get_legacy_pwrlevels(dev); else { - ret = dev_pm_opp_of_add_table(dev); + ret = devm_pm_opp_of_add_table(dev); if (ret) DRM_DEV_ERROR(dev, "Unable to set the OPP table\n"); } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index e500a9294528..1d3a4f395e74 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -1141,21 +1141,21 @@ static int dpu_bind(struct device *dev, struct device *master, void *data) if (!dpu_kms) return -ENOMEM; - dpu_kms->opp_table = dev_pm_opp_set_clkname(dev, "core"); - if (IS_ERR(dpu_kms->opp_table)) - return PTR_ERR(dpu_kms->opp_table); + ret = devm_pm_opp_set_clkname(dev, "core"); + if (ret) + return ret; /* OPP table is optional */ - ret = dev_pm_opp_of_add_table(dev); + ret = devm_pm_opp_of_add_table(dev); if (ret && ret != -ENODEV) { dev_err(dev, "invalid OPP table in device tree\n"); - goto put_clkname; + return ret; } mp = &dpu_kms->mp; ret = msm_dss_parse_clock(pdev, mp); if (ret) { DPU_ERROR("failed to parse clocks, ret=%d\n", ret); - goto err; + return ret; } platform_set_drvdata(pdev, dpu_kms); @@ -1163,7 +1163,7 @@ static int dpu_bind(struct device *dev, struct device *master, void *data) ret = msm_kms_init(&dpu_kms->base, &kms_funcs); if (ret) { DPU_ERROR("failed to init kms, ret=%d\n", ret); - goto err; + return ret; } dpu_kms->dev = ddev; dpu_kms->pdev = pdev; @@ -1172,11 +1172,7 @@ static int dpu_bind(struct device *dev, struct device *master, void *data) dpu_kms->rpm_enabled = true; priv->kms = &dpu_kms->base; - return ret; -err: - dev_pm_opp_of_remove_table(dev); -put_clkname: - dev_pm_opp_put_clkname(dpu_kms->opp_table); + return ret; } @@ -1192,9 +1188,6 @@ static void dpu_unbind(struct device *dev, struct device *master, void *data) if (dpu_kms->rpm_enabled) pm_runtime_disable(&pdev->dev); - - dev_pm_opp_of_remove_table(dev); - dev_pm_opp_put_clkname(dpu_kms->opp_table); } static const struct component_ops dpu_ops = { diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h index bbaee4f75c6f..2b23696ee7fd 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h @@ -128,8 +128,6 @@ struct dpu_kms { struct platform_device *pdev; bool rpm_enabled; - struct opp_table *opp_table; - struct dss_module_power mp; /* reference count bandwidth requests, so we know when we can diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index 2a8955ca70d1..6f8ace7890fe 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -77,8 +77,6 @@ struct dp_ctrl_private { struct dp_parser *parser; struct dp_catalog *catalog; - struct opp_table *opp_table; - struct completion idle_comp; struct completion video_comp; }; @@ -1941,20 +1939,17 @@ struct dp_ctrl *dp_ctrl_get(struct device *dev, struct dp_link *link, return ERR_PTR(-ENOMEM); } - ctrl->opp_table = dev_pm_opp_set_clkname(dev, "ctrl_link"); - if (IS_ERR(ctrl->opp_table)) { + ret = devm_pm_opp_set_clkname(dev, "ctrl_link"); + if (ret) { dev_err(dev, "invalid DP OPP table in device tree\n"); - /* caller do PTR_ERR(ctrl->opp_table) */ - return (struct dp_ctrl *)ctrl->opp_table; + /* caller do PTR_ERR(opp_table) */ + return (struct dp_ctrl *)ERR_PTR(ret); } /* OPP table is optional */ - ret = dev_pm_opp_of_add_table(dev); - if (ret) { + ret = devm_pm_opp_of_add_table(dev); + if (ret) dev_err(dev, "failed to add DP OPP table\n"); - dev_pm_opp_put_clkname(ctrl->opp_table); - ctrl->opp_table = NULL; - } init_completion(&ctrl->idle_comp); init_completion(&ctrl->video_comp); @@ -1970,16 +1965,3 @@ struct dp_ctrl *dp_ctrl_get(struct device *dev, struct dp_link *link, return &ctrl->dp_ctrl; } - -void dp_ctrl_put(struct dp_ctrl *dp_ctrl) -{ - struct dp_ctrl_private *ctrl; - - ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); - - if (ctrl->opp_table) { - dev_pm_opp_of_remove_table(ctrl->dev); - dev_pm_opp_put_clkname(ctrl->opp_table); - ctrl->opp_table = NULL; - } -} diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.h b/drivers/gpu/drm/msm/dp/dp_ctrl.h index 25e4f7512252..2363a2df9597 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.h +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.h @@ -33,6 +33,5 @@ struct dp_ctrl *dp_ctrl_get(struct device *dev, struct dp_link *link, struct dp_panel *panel, struct drm_dp_aux *aux, struct dp_power *power, struct dp_catalog *catalog, struct dp_parser *parser); -void dp_ctrl_put(struct dp_ctrl *dp_ctrl); #endif /* _DP_CTRL_H_ */ diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index cf9c64534624..1b051ee93d7a 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -747,7 +747,6 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) static void dp_display_deinit_sub_modules(struct dp_display_private *dp) { dp_debug_put(dp->debug); - dp_ctrl_put(dp->ctrl); dp_panel_put(dp->panel); dp_aux_put(dp->aux); dp_audio_put(dp->audio); @@ -841,13 +840,11 @@ static int dp_init_sub_modules(struct dp_display_private *dp) rc = PTR_ERR(dp->audio); pr_err("failed to initialize audio, rc = %d\n", rc); dp->audio = NULL; - goto error_audio; + goto error_ctrl; } return rc; -error_audio: - dp_ctrl_put(dp->ctrl); error_ctrl: dp_panel_put(dp->panel); error_link: diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index dfaff19f4ba0..ed504fe5074f 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -114,8 +114,6 @@ struct msm_dsi_host { struct clk *pixel_clk_src; struct clk *byte_intf_clk; - struct opp_table *opp_table; - u32 byte_clk_rate; u32 pixel_clk_rate; u32 esc_clk_rate; @@ -1885,14 +1883,13 @@ int msm_dsi_host_init(struct msm_dsi *msm_dsi) goto fail; } - msm_host->opp_table = dev_pm_opp_set_clkname(&pdev->dev, "byte"); - if (IS_ERR(msm_host->opp_table)) - return PTR_ERR(msm_host->opp_table); + ret = devm_pm_opp_set_clkname(&pdev->dev, "byte"); + if (ret) + return ret; /* OPP table is optional */ - ret = dev_pm_opp_of_add_table(&pdev->dev); + ret = devm_pm_opp_of_add_table(&pdev->dev); if (ret && ret != -ENODEV) { dev_err(&pdev->dev, "invalid OPP table in device tree\n"); - dev_pm_opp_put_clkname(msm_host->opp_table); return ret; } @@ -1931,8 +1928,6 @@ void msm_dsi_host_destroy(struct mipi_dsi_host *host) mutex_destroy(&msm_host->cmd_mutex); mutex_destroy(&msm_host->dev_mutex); - dev_pm_opp_of_remove_table(&msm_host->pdev->dev); - dev_pm_opp_put_clkname(msm_host->opp_table); pm_runtime_disable(&msm_host->pdev->dev); } -- cgit v1.2.3-70-g09d2 From bce98bf7f6cea9c192c139bf97d1815b5d382785 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 30 Apr 2021 12:31:00 -0700 Subject: drm/msm: Use VERB() for extra verbose logging These messages are useful for bringup/early development but in production they don't provide much value. We know what sort of GPU we have and interrupt information can be gathered other ways. This cuts down on lines in the drm debug logs that happen too often, making the debug logs practically useless. Cc: Dmitry Baryshkov Cc: Abhinav Kumar Cc: Kuogee Hsieh Cc: aravindh@codeaurora.org Cc: Sean Paul Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20210430193104.1770538-3-swboyd@chromium.org [resolve merge conflicts with dpu irq refactor] Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 2 +- drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/msm/adreno/adreno_gpu.c') diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index c1b02f790804..43fe01e75de4 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -408,7 +408,7 @@ int adreno_hw_init(struct msm_gpu *gpu) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); int ret, i; - DBG("%s", gpu->name); + VERB("%s", gpu->name); ret = adreno_load_fw(adreno_gpu); if (ret) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c index 4f110c428b60..19a3a3e9387b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c @@ -24,7 +24,7 @@ static void dpu_core_irq_callback_handler(void *arg, int irq_idx) struct dpu_irq_callback *cb; unsigned long irq_flags; - pr_debug("irq_idx=%d\n", irq_idx); + VERB("irq_idx=%d\n", irq_idx); if (list_empty(&irq_obj->irq_cb_tbl[irq_idx])) DRM_ERROR("no registered cb, idx:%d\n", irq_idx); @@ -80,7 +80,7 @@ int dpu_core_irq_register_callback(struct dpu_kms *dpu_kms, int irq_idx, return -EINVAL; } - DPU_DEBUG("[%pS] irq_idx=%d\n", __builtin_return_address(0), irq_idx); + VERB("[%pS] irq_idx=%d\n", __builtin_return_address(0), irq_idx); spin_lock_irqsave(&dpu_kms->irq_obj.cb_lock, irq_flags); trace_dpu_core_irq_register_callback(irq_idx, register_irq_cb); @@ -125,7 +125,7 @@ int dpu_core_irq_unregister_callback(struct dpu_kms *dpu_kms, int irq_idx, return -EINVAL; } - DPU_DEBUG("[%pS] irq_idx=%d\n", __builtin_return_address(0), irq_idx); + VERB("[%pS] irq_idx=%d\n", __builtin_return_address(0), irq_idx); spin_lock_irqsave(&dpu_kms->irq_obj.cb_lock, irq_flags); trace_dpu_core_irq_unregister_callback(irq_idx, register_irq_cb); @@ -138,7 +138,7 @@ int dpu_core_irq_unregister_callback(struct dpu_kms *dpu_kms, int irq_idx, if (ret) DPU_ERROR("Fail to disable IRQ for irq_idx:%d\n", irq_idx); - DPU_DEBUG("irq_idx=%d ret=%d\n", irq_idx, ret); + VERB("irq_idx=%d ret=%d\n", irq_idx, ret); } spin_unlock_irqrestore(&dpu_kms->irq_obj.cb_lock, irq_flags); -- cgit v1.2.3-70-g09d2 From bda1d6e56038698b1cf856ecef5ab4cc569d9079 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Tue, 8 Jun 2021 13:27:44 -0400 Subject: drm/msm: remove unused icc_path/ocmem_icc_path These aren't used by anything anymore. Signed-off-by: Jonathan Marek Reviewed-by: Akhil P Oommen Link: https://lore.kernel.org/r/20210608172808.11803-2-jonathan@marek.ca Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 3 --- drivers/gpu/drm/msm/msm_gpu.h | 9 --------- 2 files changed, 12 deletions(-) (limited to 'drivers/gpu/drm/msm/adreno/adreno_gpu.c') diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 43fe01e75de4..3a84a7b312ff 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -946,7 +946,4 @@ void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) pm_runtime_disable(&priv->gpu_pdev->dev); msm_gpu_cleanup(&adreno_gpu->base); - - icc_put(gpu->icc_path); - icc_put(gpu->ocmem_icc_path); } diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 18baf935e143..c302ab7ffb06 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -118,15 +118,6 @@ struct msm_gpu { struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk; uint32_t fast_rate; - /* The gfx-mem interconnect path that's used by all GPU types. */ - struct icc_path *icc_path; - - /* - * Second interconnect path for some A3xx and all A4xx GPUs to the - * On Chip MEMory (OCMEM). - */ - struct icc_path *ocmem_icc_path; - /* Hang and Inactivity Detection: */ #define DRM_MSM_INACTIVE_PERIOD 66 /* in ms (roughly four frames) */ -- cgit v1.2.3-70-g09d2 From f6d62d091cfd1c307a1bb83ef46d334d9ac27751 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Tue, 8 Jun 2021 13:27:48 -0400 Subject: drm/msm/a6xx: add support for Adreno 660 GPU Add adreno_is_{a660,a650_family} helpers and convert update existing adreno_is_a650 usage based on downstream driver's logic (changing into adreno_is_a650_family or adding adreno_is_a660). And add the remaining changes required for A660, again based on the downstream driver: missing GMU allocations, additional register init, dummy hfi BW table, cp protect list, entry in gpulist table, hwcg table, updated a6xx_ucode_check_version check. Signed-off-by: Jonathan Marek Link: https://lore.kernel.org/r/20210608172808.11803-6-jonathan@marek.ca Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 32 ++++-- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 162 ++++++++++++++++++++++++----- drivers/gpu/drm/msm/adreno/a6xx_hfi.c | 33 ++++++ drivers/gpu/drm/msm/adreno/adreno_device.c | 13 +++ drivers/gpu/drm/msm/adreno/adreno_gpu.c | 2 +- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 13 ++- 6 files changed, 219 insertions(+), 36 deletions(-) (limited to 'drivers/gpu/drm/msm/adreno/adreno_gpu.c') diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index b64e62323792..b349692219b7 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -519,7 +519,7 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu) if (!pdcptr) goto err; - if (adreno_is_a650(adreno_gpu)) + if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) pdc_in_aop = true; else if (adreno_is_a618(adreno_gpu) || adreno_is_a640(adreno_gpu)) pdc_address_offset = 0x30090; @@ -549,7 +549,7 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu) gmu_write_rscc(gmu, REG_A6XX_RSCC_PDC_MATCH_VALUE_HI, 0x4514); /* Load RSC sequencer uCode for sleep and wakeup */ - if (adreno_is_a650(adreno_gpu)) { + if (adreno_is_a650_family(adreno_gpu)) { gmu_write_rscc(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0, 0xeaaae5a0); gmu_write_rscc(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 1, 0xe1a1ebab); gmu_write_rscc(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 2, 0xa2e0a581); @@ -597,7 +597,7 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu) pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID + 4, 0x10108); pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR + 4, 0x30000); - if (adreno_is_a618(adreno_gpu) || adreno_is_a650(adreno_gpu)) + if (adreno_is_a618(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA + 4, 0x2); else pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA + 4, 0x3); @@ -698,7 +698,7 @@ static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu) u32 itcm_base = 0x00000000; u32 dtcm_base = 0x00040000; - if (adreno_is_a650(adreno_gpu)) + if (adreno_is_a650_family(adreno_gpu)) dtcm_base = 0x10004000; if (gmu->legacy) { @@ -751,7 +751,7 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state) int ret; u32 chipid; - if (adreno_is_a650(adreno_gpu)) { + if (adreno_is_a650_family(adreno_gpu)) { gmu_write(gmu, REG_A6XX_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 1); gmu_write(gmu, REG_A6XX_GPU_GMU_CX_GMU_CX_FAL_INTF, 1); } @@ -1496,12 +1496,28 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) if (ret) goto err_put_device; + + /* A660 now requires handling "prealloc requests" in GMU firmware + * For now just hardcode allocations based on the known firmware. + * note: there is no indication that these correspond to "dummy" or + * "debug" regions, but this "guess" allows reusing these BOs which + * are otherwise unused by a660. + */ + gmu->dummy.size = SZ_4K; + if (adreno_is_a660(adreno_gpu)) { + ret = a6xx_gmu_memory_alloc(gmu, &gmu->debug, SZ_4K * 7, 0x60400000); + if (ret) + goto err_memory; + + gmu->dummy.size = SZ_8K; + } + /* Allocate memory for the GMU dummy page */ - ret = a6xx_gmu_memory_alloc(gmu, &gmu->dummy, SZ_4K, 0x60000000); + ret = a6xx_gmu_memory_alloc(gmu, &gmu->dummy, gmu->dummy.size, 0x60000000); if (ret) goto err_memory; - if (adreno_is_a650(adreno_gpu)) { + if (adreno_is_a650_family(adreno_gpu)) { ret = a6xx_gmu_memory_alloc(gmu, &gmu->icache, SZ_16M - SZ_16K, 0x04000); if (ret) @@ -1543,7 +1559,7 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) goto err_memory; } - if (adreno_is_a650(adreno_gpu)) { + if (adreno_is_a650_family(adreno_gpu)) { gmu->rscc = a6xx_gmu_get_mmio(pdev, "rscc"); if (IS_ERR(gmu->rscc)) goto err_mmio; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 16b3bd9ad44c..eacc6ef8c642 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -427,6 +427,59 @@ const struct adreno_reglist a650_hwcg[] = { {}, }; +const struct adreno_reglist a660_hwcg[] = { + {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, + {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, + {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, + {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222}, + {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, + {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, + {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022}, + {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, + {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, + {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, + {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, + {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, + {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, + {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, + {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000}, + {REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222}, + {REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111}, + {REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000}, + {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, + {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {REG_A6XX_RBBM_ISDB_CNT, 0x00000182}, + {REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000}, + {REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000}, + {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, + {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, + {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, + {}, +}; + static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -541,6 +594,51 @@ static const u32 a650_protect[] = { A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */ }; +/* These are for a635 and a660 */ +static const u32 a660_protect[] = { + A6XX_PROTECT_RDONLY(0x00000, 0x04ff), + A6XX_PROTECT_RDONLY(0x00501, 0x0005), + A6XX_PROTECT_RDONLY(0x0050b, 0x02f4), + A6XX_PROTECT_NORDWR(0x0050e, 0x0000), + A6XX_PROTECT_NORDWR(0x00510, 0x0000), + A6XX_PROTECT_NORDWR(0x00534, 0x0000), + A6XX_PROTECT_NORDWR(0x00800, 0x0082), + A6XX_PROTECT_NORDWR(0x008a0, 0x0008), + A6XX_PROTECT_NORDWR(0x008ab, 0x0024), + A6XX_PROTECT_RDONLY(0x008de, 0x00ae), + A6XX_PROTECT_NORDWR(0x00900, 0x004d), + A6XX_PROTECT_NORDWR(0x0098d, 0x0272), + A6XX_PROTECT_NORDWR(0x00e00, 0x0001), + A6XX_PROTECT_NORDWR(0x00e03, 0x000c), + A6XX_PROTECT_NORDWR(0x03c00, 0x00c3), + A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff), + A6XX_PROTECT_NORDWR(0x08630, 0x01cf), + A6XX_PROTECT_NORDWR(0x08e00, 0x0000), + A6XX_PROTECT_NORDWR(0x08e08, 0x0000), + A6XX_PROTECT_NORDWR(0x08e50, 0x001f), + A6XX_PROTECT_NORDWR(0x08e80, 0x027f), + A6XX_PROTECT_NORDWR(0x09624, 0x01db), + A6XX_PROTECT_NORDWR(0x09e60, 0x0011), + A6XX_PROTECT_NORDWR(0x09e78, 0x0187), + A6XX_PROTECT_NORDWR(0x0a630, 0x01cf), + A6XX_PROTECT_NORDWR(0x0ae02, 0x0000), + A6XX_PROTECT_NORDWR(0x0ae50, 0x012f), + A6XX_PROTECT_NORDWR(0x0b604, 0x0000), + A6XX_PROTECT_NORDWR(0x0b608, 0x0006), + A6XX_PROTECT_NORDWR(0x0be02, 0x0001), + A6XX_PROTECT_NORDWR(0x0be20, 0x015f), + A6XX_PROTECT_NORDWR(0x0d000, 0x05ff), + A6XX_PROTECT_NORDWR(0x0f000, 0x0bff), + A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff), + A6XX_PROTECT_NORDWR(0x18400, 0x1fff), + A6XX_PROTECT_NORDWR(0x1a400, 0x1fff), + A6XX_PROTECT_NORDWR(0x1f400, 0x0443), + A6XX_PROTECT_RDONLY(0x1f844, 0x007b), + A6XX_PROTECT_NORDWR(0x1f860, 0x0000), + A6XX_PROTECT_NORDWR(0x1f887, 0x001b), + A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */ +}; + static void a6xx_set_cp_protect(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -554,6 +652,10 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu) regs = a650_protect; count = ARRAY_SIZE(a650_protect); count_max = 48; + } else if (adreno_is_a660(adreno_gpu)) { + regs = a660_protect; + count = ARRAY_SIZE(a660_protect); + count_max = 48; } /* @@ -584,7 +686,7 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu) if (adreno_is_a640(adreno_gpu)) amsbc = 1; - if (adreno_is_a650(adreno_gpu)) { + if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) { /* TODO: get ddr type from bootloader and use 2 for LPDDR4 */ lower_bit = 3; amsbc = 1; @@ -648,6 +750,11 @@ static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, * Targets up to a640 (a618, a630 and a640) need to check for a * microcode version that is patched to support the whereami opcode or * one that is new enough to include it by default. + * + * a650 tier targets don't need whereami but still need to be + * equal to or newer than 0.95 for other security fixes + * + * a660 targets have all the critical security fixes from the start */ if (adreno_is_a618(adreno_gpu) || adreno_is_a630(adreno_gpu) || adreno_is_a640(adreno_gpu)) { @@ -671,27 +778,20 @@ static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, DRM_DEV_ERROR(&gpu->pdev->dev, "a630 SQE ucode is too old. Have version %x need at least %x\n", buf[0] & 0xfff, 0x190); - } else { - /* - * a650 tier targets don't need whereami but still need to be - * equal to or newer than 0.95 for other security fixes - */ - if (adreno_is_a650(adreno_gpu)) { - if ((buf[0] & 0xfff) >= 0x095) { - ret = true; - goto out; - } - - DRM_DEV_ERROR(&gpu->pdev->dev, - "a650 SQE ucode is too old. Have version %x need at least %x\n", - buf[0] & 0xfff, 0x095); + } else if (adreno_is_a650(adreno_gpu)) { + if ((buf[0] & 0xfff) >= 0x095) { + ret = true; + goto out; } - /* - * When a660 is added those targets should return true here - * since those have all the critical security fixes built in - * from the start - */ + DRM_DEV_ERROR(&gpu->pdev->dev, + "a650 SQE ucode is too old. Have version %x need at least %x\n", + buf[0] & 0xfff, 0x095); + } else if (adreno_is_a660(adreno_gpu)) { + ret = true; + } else { + DRM_DEV_ERROR(&gpu->pdev->dev, + "unknown GPU, add it to a6xx_ucode_check_version()!!\n"); } out: msm_gem_put_vaddr(obj); @@ -797,7 +897,7 @@ static int a6xx_hw_init(struct msm_gpu *gpu) a6xx_set_hwcg(gpu, true); /* VBIF/GBIF start*/ - if (adreno_is_a640(adreno_gpu) || adreno_is_a650(adreno_gpu)) { + if (adreno_is_a640(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) { gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620); gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620); gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620); @@ -822,7 +922,7 @@ static int a6xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_LO, 0xfffff000); gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_HI, 0x0001ffff); - if (!adreno_is_a650(adreno_gpu)) { + if (!adreno_is_a650_family(adreno_gpu)) { /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */ gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO, REG_A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x00100000); @@ -835,19 +935,22 @@ static int a6xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804); gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4); - if (adreno_is_a640(adreno_gpu) || adreno_is_a650(adreno_gpu)) + if (adreno_is_a640(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140); else gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0); gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c); + if (adreno_is_a660(adreno_gpu)) + gpu_write(gpu, REG_A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020); + /* Setting the mem pool size */ gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128); /* Setting the primFifo thresholds default values, * and vccCacheSkipDis=1 bit (0x200) for A640 and newer */ - if (adreno_is_a650(adreno_gpu)) + if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200); else if (adreno_is_a640(adreno_gpu)) gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00200200); @@ -872,7 +975,7 @@ static int a6xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, 1); /* Set weights for bicubic filtering */ - if (adreno_is_a650(adreno_gpu)) { + if (adreno_is_a650_family(adreno_gpu)) { gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0); gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1, 0x3fe05ff4); @@ -887,6 +990,13 @@ static int a6xx_hw_init(struct msm_gpu *gpu) /* Protect registers from the CP */ a6xx_set_cp_protect(gpu); + if (adreno_is_a660(adreno_gpu)) { + gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1); + gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0); + /* Set dualQ + disable afull for A660 GPU but not for A635 */ + gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906); + } + /* Enable expanded apriv for targets that support it */ if (gpu->hw_apriv) { gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL, @@ -1558,7 +1668,7 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) */ info = adreno_info(config->rev); - if (info && info->revn == 650) + if (info && (info->revn == 650 || info->revn == 660)) adreno_gpu->base.hw_apriv = true; a6xx_llc_slices_init(pdev, a6xx_gpu); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c index ccd44d0418f8..919433732b43 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c @@ -351,6 +351,37 @@ static void a650_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) msg->cnoc_cmds_data[1][0] = 0x60000001; } +static void a660_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) +{ + /* + * Send a single "off" entry just to get things running + * TODO: bus scaling + */ + msg->bw_level_num = 1; + + msg->ddr_cmds_num = 3; + msg->ddr_wait_bitmask = 0x01; + + msg->ddr_cmds_addrs[0] = 0x50004; + msg->ddr_cmds_addrs[1] = 0x500a0; + msg->ddr_cmds_addrs[2] = 0x50000; + + msg->ddr_cmds_data[0][0] = 0x40000000; + msg->ddr_cmds_data[0][1] = 0x40000000; + msg->ddr_cmds_data[0][2] = 0x40000000; + + /* + * These are the CX (CNOC) votes - these are used by the GMU but the + * votes are known and fixed for the target + */ + msg->cnoc_cmds_num = 1; + msg->cnoc_wait_bitmask = 0x01; + + msg->cnoc_cmds_addrs[0] = 0x50070; + msg->cnoc_cmds_data[0][0] = 0x40000000; + msg->cnoc_cmds_data[1][0] = 0x60000001; +} + static void a6xx_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) { /* Send a single "off" entry since the 630 GMU doesn't do bus scaling */ @@ -401,6 +432,8 @@ static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu) a640_build_bw_table(&msg); else if (adreno_is_a650(adreno_gpu)) a650_build_bw_table(&msg); + else if (adreno_is_a660(adreno_gpu)) + a660_build_bw_table(&msg); else a6xx_build_bw_table(&msg); diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index b3337b93be91..6dad8015c9a1 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -287,6 +287,19 @@ static const struct adreno_info gpulist[] = { .init = a6xx_gpu_init, .zapfw = "a650_zap.mdt", .hwcg = a650_hwcg, + }, { + .rev = ADRENO_REV(6, 6, 0, ANY_ID), + .revn = 660, + .name = "A660", + .fw = { + [ADRENO_FW_SQE] = "a660_sqe.fw", + [ADRENO_FW_GMU] = "a660_gmu.bin", + }, + .gmem = SZ_1M + SZ_512K, + .inactive_period = DRM_MSM_INACTIVE_PERIOD, + .init = a6xx_gpu_init, + .zapfw = "a660_zap.mdt", + .hwcg = a660_hwcg, }, }; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 3a84a7b312ff..90bc8637ee52 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -239,7 +239,7 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) *value = adreno_gpu->gmem; return 0; case MSM_PARAM_GMEM_BASE: - *value = !adreno_is_a650(adreno_gpu) ? 0x100000 : 0; + *value = !adreno_is_a650_family(adreno_gpu) ? 0x100000 : 0; return 0; case MSM_PARAM_CHIP_ID: *value = adreno_gpu->rev.patchid | diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index ccac275aa7a2..8dbe0d157520 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -55,7 +55,7 @@ struct adreno_reglist { u32 value; }; -extern const struct adreno_reglist a630_hwcg[], a640_hwcg[], a650_hwcg[]; +extern const struct adreno_reglist a630_hwcg[], a640_hwcg[], a650_hwcg[], a660_hwcg[]; struct adreno_info { struct adreno_rev rev; @@ -247,6 +247,17 @@ static inline int adreno_is_a650(struct adreno_gpu *gpu) return gpu->revn == 650; } +static inline int adreno_is_a660(struct adreno_gpu *gpu) +{ + return gpu->revn == 660; +} + +/* check for a650, a660, or any derivatives */ +static inline int adreno_is_a650_family(struct adreno_gpu *gpu) +{ + return gpu->revn == 650 || gpu->revn == 620 || gpu->revn == 660; +} + int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value); const struct firmware *adreno_request_fw(struct adreno_gpu *adreno_gpu, const char *fwname); -- cgit v1.2.3-70-g09d2 From e25e92e08e32c6bf63a968929d232f13dcf9938c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 10 Jun 2021 14:44:13 -0700 Subject: drm/msm: devcoredump iommu fault support Wire up support to stall the SMMU on iova fault, and collect a devcore- dump snapshot for easier debugging of faults. Currently this is a6xx-only, but mostly only because so far it is the only one using adreno-smmu-priv. Signed-off-by: Rob Clark Acked-by: Jordan Crouse Link: https://lore.kernel.org/r/20210610214431.539029-6-robdclark@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 19 ++++++++++-- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 38 +++++++++++++++++++++-- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 42 ++++++++++++++++++++----- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 15 +++++++++ drivers/gpu/drm/msm/msm_gem.h | 1 + drivers/gpu/drm/msm/msm_gem_submit.c | 1 + drivers/gpu/drm/msm/msm_gpu.c | 48 +++++++++++++++++++++++++++++ drivers/gpu/drm/msm/msm_gpu.h | 17 ++++++++++ drivers/gpu/drm/msm/msm_gpummu.c | 5 +++ drivers/gpu/drm/msm/msm_iommu.c | 11 +++++++ drivers/gpu/drm/msm/msm_mmu.h | 1 + 11 files changed, 186 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/msm/adreno/adreno_gpu.c') diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index eb030b00bff4..7a271de9a212 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -1200,6 +1200,15 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu) struct drm_device *dev = gpu->dev; struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); + /* + * If stalled on SMMU fault, we could trip the GPU's hang detection, + * but the fault handler will trigger the devcore dump, and we want + * to otherwise resume normally rather than killing the submit, so + * just bail. + */ + if (gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24)) + return; + DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", ring ? ring->id : -1, ring ? ring->seqno : 0, gpu_read(gpu, REG_A5XX_RBBM_STATUS), @@ -1523,6 +1532,7 @@ static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu) { struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state), GFP_KERNEL); + bool stalled = !!(gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24)); if (!a5xx_state) return ERR_PTR(-ENOMEM); @@ -1535,8 +1545,13 @@ static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu) a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS); - /* Get the HLSQ regs with the help of the crashdumper */ - a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state); + /* + * Get the HLSQ regs with the help of the crashdumper, but only if + * we are not stalled in an iommu fault (in which case the crashdumper + * would not have access to memory) + */ + if (!stalled) + a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state); a5xx_set_hwcg(gpu, true); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 0d39e8fdb16f..9c5e4618aa0a 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1193,6 +1193,16 @@ static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *da struct msm_gpu *gpu = arg; struct adreno_smmu_fault_info *info = data; const char *type = "UNKNOWN"; + const char *block; + bool do_devcoredump = info && !READ_ONCE(gpu->crashstate); + + /* + * If we aren't going to be resuming later from fault_worker, then do + * it now. + */ + if (!do_devcoredump) { + gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu); + } /* * Print a default message if we couldn't get the data from the @@ -1216,15 +1226,30 @@ static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *da else if (info->fsr & ARM_SMMU_FSR_EF) type = "EXTERNAL"; + block = a6xx_fault_block(gpu, info->fsynr1 & 0xff); + pr_warn_ratelimited("*** gpu fault: ttbr0=%.16llx iova=%.16lx dir=%s type=%s source=%s (%u,%u,%u,%u)\n", info->ttbr0, iova, - flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ", type, - a6xx_fault_block(gpu, info->fsynr1 & 0xff), + flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ", + type, block, gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)), gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)), gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)), gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7))); + if (do_devcoredump) { + /* Turn off the hangcheck timer to keep it from bothering us */ + del_timer(&gpu->hangcheck_timer); + + gpu->fault_info.ttbr0 = info->ttbr0; + gpu->fault_info.iova = iova; + gpu->fault_info.flags = flags; + gpu->fault_info.type = type; + gpu->fault_info.block = block; + + kthread_queue_work(gpu->worker, &gpu->fault_work); + } + return 0; } @@ -1276,6 +1301,15 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); + /* + * If stalled on SMMU fault, we could trip the GPU's hang detection, + * but the fault handler will trigger the devcore dump, and we want + * to otherwise resume normally rather than killing the submit, so + * just bail. + */ + if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT) + return; + /* * Force the GPU to stay on until after we finish * collecting information diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index 21c49c5b4519..ad4ea0ed5d99 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -832,6 +832,20 @@ static void a6xx_get_registers(struct msm_gpu *gpu, a6xx_get_ahb_gpu_registers(gpu, a6xx_state, &a6xx_vbif_reglist, &a6xx_state->registers[index++]); + if (!dumper) { + /* + * We can't use the crashdumper when the SMMU is stalled, + * because the GPU has no memory access until we resume + * translation (but we don't want to do that until after + * we have captured as much useful GPU state as possible). + * So instead collect registers via the CPU: + */ + for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) + a6xx_get_ahb_gpu_registers(gpu, + a6xx_state, &a6xx_reglist[i], + &a6xx_state->registers[index++]); + return; + } for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) a6xx_get_crashdumper_registers(gpu, @@ -905,11 +919,13 @@ static void a6xx_get_indexed_registers(struct msm_gpu *gpu, struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) { - struct a6xx_crashdumper dumper = { 0 }; + struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL; struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state), GFP_KERNEL); + bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & + A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT); if (!a6xx_state) return ERR_PTR(-ENOMEM); @@ -928,14 +944,24 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) /* Get the banks of indexed registers */ a6xx_get_indexed_registers(gpu, a6xx_state); - /* Try to initialize the crashdumper */ - if (!a6xx_crashdumper_init(gpu, &dumper)) { - a6xx_get_registers(gpu, a6xx_state, &dumper); - a6xx_get_shaders(gpu, a6xx_state, &dumper); - a6xx_get_clusters(gpu, a6xx_state, &dumper); - a6xx_get_dbgahb_clusters(gpu, a6xx_state, &dumper); + /* + * Try to initialize the crashdumper, if we are not dumping state + * with the SMMU stalled. The crashdumper needs memory access to + * write out GPU state, so we need to skip this when the SMMU is + * stalled in response to an iova fault + */ + if (!stalled && !a6xx_crashdumper_init(gpu, &_dumper)) { + dumper = &_dumper; + } + + a6xx_get_registers(gpu, a6xx_state, dumper); + + if (dumper) { + a6xx_get_shaders(gpu, a6xx_state, dumper); + a6xx_get_clusters(gpu, a6xx_state, dumper); + a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper); - msm_gem_kernel_put(dumper.bo, gpu->aspace, true); + msm_gem_kernel_put(dumper->bo, gpu->aspace, true); } if (snapshot_debugbus) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 90bc8637ee52..9f5a30234b33 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -684,6 +684,21 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, adreno_gpu->info->revn, adreno_gpu->rev.core, adreno_gpu->rev.major, adreno_gpu->rev.minor, adreno_gpu->rev.patchid); + /* + * If this is state collected due to iova fault, so fault related info + * + * TTBR0 would not be zero, so this is a good way to distinguish + */ + if (state->fault_info.ttbr0) { + const struct msm_gpu_fault_info *info = &state->fault_info; + + drm_puts(p, "fault-info:\n"); + drm_printf(p, " - ttbr0=%.16llx\n", info->ttbr0); + drm_printf(p, " - iova=%.16lx\n", info->iova); + drm_printf(p, " - dir=%s\n", info->flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ"); + drm_printf(p, " - type=%s\n", info->type); + drm_printf(p, " - source=%s\n", info->block); + } drm_printf(p, "rbbm-status: 0x%08x\n", state->rbbm_status); diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 03e2cc2a2ce1..405f8411e395 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -328,6 +328,7 @@ struct msm_gem_submit { struct dma_fence *fence; struct msm_gpu_submitqueue *queue; struct pid *pid; /* submitting process */ + bool fault_dumped; /* Limit devcoredump dumping to one per submit */ bool valid; /* true if no cmdstream patching needed */ bool in_rb; /* "sudo" mode, copy cmds into RB */ struct msm_ringbuffer *ring; diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 5480852bdeda..44f84bfd0c0e 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -50,6 +50,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, submit->cmd = (void *)&submit->bos[nr_bos]; submit->queue = queue; submit->ring = gpu->rb[queue->prio]; + submit->fault_dumped = false; /* initially, until copy_from_user() and bo lookup succeeds: */ submit->nr_bos = 0; diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 4ee8ba09b762..0ebf7bc6ad09 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -387,6 +387,7 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, /* Fill in the additional crash state information */ state->comm = kstrdup(comm, GFP_KERNEL); state->cmd = kstrdup(cmd, GFP_KERNEL); + state->fault_info = gpu->fault_info; if (submit) { int i, nr = 0; @@ -559,6 +560,52 @@ static void recover_worker(struct kthread_work *work) msm_gpu_retire(gpu); } +static void fault_worker(struct kthread_work *work) +{ + struct msm_gpu *gpu = container_of(work, struct msm_gpu, fault_work); + struct drm_device *dev = gpu->dev; + struct msm_gem_submit *submit; + struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); + char *comm = NULL, *cmd = NULL; + + mutex_lock(&dev->struct_mutex); + + submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1); + if (submit && submit->fault_dumped) + goto resume_smmu; + + if (submit) { + struct task_struct *task; + + task = get_pid_task(submit->pid, PIDTYPE_PID); + if (task) { + comm = kstrdup(task->comm, GFP_KERNEL); + cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL); + put_task_struct(task); + } + + /* + * When we get GPU iova faults, we can get 1000s of them, + * but we really only want to log the first one. + */ + submit->fault_dumped = true; + } + + /* Record the crash state */ + pm_runtime_get_sync(&gpu->pdev->dev); + msm_gpu_crashstate_capture(gpu, submit, comm, cmd); + pm_runtime_put_sync(&gpu->pdev->dev); + + kfree(cmd); + kfree(comm); + +resume_smmu: + memset(&gpu->fault_info, 0, sizeof(gpu->fault_info)); + gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu); + + mutex_unlock(&dev->struct_mutex); +} + static void hangcheck_timer_reset(struct msm_gpu *gpu) { struct msm_drm_private *priv = gpu->dev->dev_private; @@ -923,6 +970,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, INIT_LIST_HEAD(&gpu->active_list); kthread_init_work(&gpu->retire_work, retire_worker); kthread_init_work(&gpu->recover_work, recover_worker); + kthread_init_work(&gpu->fault_work, fault_worker); timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index fdc5851355af..ef41ec09f59c 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -71,6 +71,15 @@ struct msm_gpu_funcs { uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); }; +/* Additional state for iommu faults: */ +struct msm_gpu_fault_info { + u64 ttbr0; + unsigned long iova; + int flags; + const char *type; + const char *block; +}; + struct msm_gpu { const char *name; struct drm_device *dev; @@ -125,6 +134,12 @@ struct msm_gpu { #define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */ struct timer_list hangcheck_timer; + /* Fault info for most recent iova fault: */ + struct msm_gpu_fault_info fault_info; + + /* work for handling GPU ioval faults: */ + struct kthread_work fault_work; + /* work for handling GPU recovery: */ struct kthread_work recover_work; @@ -232,6 +247,8 @@ struct msm_gpu_state { char *comm; char *cmd; + struct msm_gpu_fault_info fault_info; + int nr_bos; struct msm_gpu_state_bo *bos; }; diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c index 379496186c7f..f7d1945e0c9f 100644 --- a/drivers/gpu/drm/msm/msm_gpummu.c +++ b/drivers/gpu/drm/msm/msm_gpummu.c @@ -68,6 +68,10 @@ static int msm_gpummu_unmap(struct msm_mmu *mmu, uint64_t iova, size_t len) return 0; } +static void msm_gpummu_resume_translation(struct msm_mmu *mmu) +{ +} + static void msm_gpummu_destroy(struct msm_mmu *mmu) { struct msm_gpummu *gpummu = to_msm_gpummu(mmu); @@ -83,6 +87,7 @@ static const struct msm_mmu_funcs funcs = { .map = msm_gpummu_map, .unmap = msm_gpummu_unmap, .destroy = msm_gpummu_destroy, + .resume_translation = msm_gpummu_resume_translation, }; struct msm_mmu *msm_gpummu_new(struct device *dev, struct msm_gpu *gpu) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 6975b95c3c29..eed2a762e9dd 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -184,6 +184,9 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) * the arm-smmu driver as a trigger to set up TTBR0 */ if (atomic_inc_return(&iommu->pagetables) == 1) { + /* Enable stall on iommu fault: */ + adreno_smmu->set_stall(adreno_smmu->cookie, true); + ret = adreno_smmu->set_ttbr0_cfg(adreno_smmu->cookie, &ttbr0_cfg); if (ret) { free_io_pgtable_ops(pagetable->pgtbl_ops); @@ -226,6 +229,13 @@ static int msm_fault_handler(struct iommu_domain *domain, struct device *dev, return 0; } +static void msm_iommu_resume_translation(struct msm_mmu *mmu) +{ + struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(mmu->dev); + + adreno_smmu->resume_translation(adreno_smmu->cookie, true); +} + static void msm_iommu_detach(struct msm_mmu *mmu) { struct msm_iommu *iommu = to_msm_iommu(mmu); @@ -273,6 +283,7 @@ static const struct msm_mmu_funcs funcs = { .map = msm_iommu_map, .unmap = msm_iommu_unmap, .destroy = msm_iommu_destroy, + .resume_translation = msm_iommu_resume_translation, }; struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain) diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h index a88f44c3268d..de158e1bf765 100644 --- a/drivers/gpu/drm/msm/msm_mmu.h +++ b/drivers/gpu/drm/msm/msm_mmu.h @@ -15,6 +15,7 @@ struct msm_mmu_funcs { size_t len, int prot); int (*unmap)(struct msm_mmu *mmu, uint64_t iova, size_t len); void (*destroy)(struct msm_mmu *mmu); + void (*resume_translation)(struct msm_mmu *mmu); }; enum msm_mmu_type { -- cgit v1.2.3-70-g09d2