diff options
Diffstat (limited to 'drivers/gpu')
246 files changed, 2464 insertions, 1356 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 2520db0b776e..c7edba18a6f0 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -199,7 +199,7 @@ config DRM_TTM config DRM_TTM_KUNIT_TEST tristate "KUnit tests for TTM" if !KUNIT_ALL_TESTS default n - depends on DRM && KUNIT && MMU + depends on DRM && KUNIT && MMU && (UML || COMPILE_TEST) select DRM_TTM select DRM_EXPORT_FOR_TESTS if m select DRM_KUNIT_TEST_HELPERS @@ -207,7 +207,8 @@ config DRM_TTM_KUNIT_TEST help Enables unit tests for TTM, a GPU memory manager subsystem used to manage memory buffers. This option is mostly useful for kernel - developers. + developers. It depends on (UML || COMPILE_TEST) since no other driver + which uses TTM can be loaded while running the tests. If in doubt, say "N". diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3d8a48f46b01..79827a6dcd7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -200,6 +200,7 @@ extern uint amdgpu_dc_debug_mask; extern uint amdgpu_dc_visual_confirm; extern uint amdgpu_dm_abm_level; extern int amdgpu_backlight; +extern int amdgpu_damage_clips; extern struct amdgpu_mgpu_info mgpu_info; extern int amdgpu_ras_enable; extern uint amdgpu_ras_mask; @@ -1078,6 +1079,8 @@ struct amdgpu_device { bool in_s3; bool in_s4; bool in_s0ix; + /* indicate amdgpu suspension status */ + bool suspend_complete; enum pp_mp1_state mp1_state; struct amdgpu_doorbell_index doorbell_index; @@ -1547,9 +1550,11 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev, #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev); bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); +void amdgpu_choose_low_power_state(struct amdgpu_device *adev); #else static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } +static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { } #endif #if defined(CONFIG_DRM_AMD_DC) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 2deebece810e..7099ff9cf8c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1519,4 +1519,22 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) #endif /* CONFIG_AMD_PMC */ } +/** + * amdgpu_choose_low_power_state + * + * @adev: amdgpu_device_pointer + * + * Choose the target low power state for the GPU + */ +void amdgpu_choose_low_power_state(struct amdgpu_device *adev) +{ + if (adev->in_runpm) + return; + + if (amdgpu_acpi_is_s0ix_active(adev)) + adev->in_s0ix = true; + else if (amdgpu_acpi_is_s3_active(adev)) + adev->in_s3 = true; +} + #endif /* CONFIG_SUSPEND */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 77e263660288..41db030ddc4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -141,11 +141,31 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) static const struct drm_client_funcs kfd_client_funcs = { .unregister = drm_client_release, }; + +int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev) +{ + int ret; + + if (!adev->kfd.init_complete) + return 0; + + ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", + &kfd_client_funcs); + if (ret) { + dev_err(adev->dev, "Failed to init DRM client: %d\n", + ret); + return ret; + } + + drm_client_register(&adev->kfd.client); + + return 0; +} + void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; int last_valid_bit; - int ret; amdgpu_amdkfd_gpuvm_init_mem_limits(); @@ -164,12 +184,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) .enable_mes = adev->enable_mes, }; - ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs); - if (ret) { - dev_err(adev->dev, "Failed to init DRM client: %d\n", ret); - return; - } - /* this is going to have a few of the MSBs set that we need to * clear */ @@ -208,10 +222,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); - if (adev->kfd.init_complete) - drm_client_register(&adev->kfd.client); - else - drm_client_release(&adev->kfd.client); amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index f262b9d89541..27c61c535e29 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -182,6 +182,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, struct mm_struct *mm, struct svm_range_bo *svm_bo); + +int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev); #if defined(CONFIG_DEBUG_FS) int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data); #endif @@ -301,7 +303,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv); -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv); +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( struct amdgpu_device *adev, struct kgd_mem *mem, bool intr); int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 899e31e3a5e8..3a3f3ce09f00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -290,7 +290,7 @@ static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool sus for (i = 0; i < adev->gfx.num_compute_rings; i++) { struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - if (!(ring && drm_sched_wqueue_ready(&ring->sched))) + if (!amdgpu_ring_sched_ready(ring)) continue; /* stop secheduler and drain ring. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f183d7faeeec..231fd927dcfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2085,21 +2085,35 @@ out: return ret; } -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv) +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv) { struct kfd_mem_attachment *entry; struct amdgpu_vm *vm; + int ret; vm = drm_priv_to_vm(drm_priv); mutex_lock(&mem->lock); + ret = amdgpu_bo_reserve(mem->bo, true); + if (ret) + goto out; + list_for_each_entry(entry, &mem->attachments, list) { - if (entry->bo_va->base.vm == vm) - kfd_mem_dmaunmap_attachment(mem, entry); + if (entry->bo_va->base.vm != vm) + continue; + if (entry->bo_va->base.bo->tbo.ttm && + !entry->bo_va->base.bo->tbo.ttm->sg) + continue; + + kfd_mem_dmaunmap_attachment(mem, entry); } + amdgpu_bo_unreserve(mem->bo); +out: mutex_unlock(&mem->lock); + + return ret; } int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index e485dd3357c6..1afbb2e932c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1678,7 +1678,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_wqueue_stop(&ring->sched); } @@ -1694,7 +1694,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_wqueue_start(&ring->sched); } @@ -1916,8 +1916,8 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) ring = adev->rings[val]; - if (!ring || !ring->funcs->preempt_ib || - !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring) || + !ring->funcs->preempt_ib) return -EINVAL; /* the last preemption failed */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b158d27d0a71..94bdb5fa6ebc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4121,23 +4121,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, } } } else { - switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { - case IP_VERSION(13, 0, 0): - case IP_VERSION(13, 0, 7): - case IP_VERSION(13, 0, 10): - r = psp_gpu_reset(adev); - break; - default: - tmp = amdgpu_reset_method; - /* It should do a default reset when loading or reloading the driver, - * regardless of the module parameter reset_method. - */ - amdgpu_reset_method = AMD_RESET_METHOD_NONE; - r = amdgpu_asic_reset(adev); - amdgpu_reset_method = tmp; - break; - } - + tmp = amdgpu_reset_method; + /* It should do a default reset when loading or reloading the driver, + * regardless of the module parameter reset_method. + */ + amdgpu_reset_method = AMD_RESET_METHOD_NONE; + r = amdgpu_asic_reset(adev); + amdgpu_reset_method = tmp; if (r) { dev_err(adev->dev, "asic reset on init failed\n"); goto failed; @@ -4524,13 +4514,15 @@ int amdgpu_device_prepare(struct drm_device *dev) struct amdgpu_device *adev = drm_to_adev(dev); int i, r; + amdgpu_choose_low_power_state(adev); + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; /* Evict the majority of BOs before starting suspend sequence */ r = amdgpu_device_evict_resources(adev); if (r) - return r; + goto unprepare; for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) @@ -4539,10 +4531,15 @@ int amdgpu_device_prepare(struct drm_device *dev) continue; r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev); if (r) - return r; + goto unprepare; } return 0; + +unprepare: + adev->in_s0ix = adev->in_s3 = false; + + return r; } /** @@ -4579,7 +4576,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); cancel_delayed_work_sync(&adev->delayed_init_work); - flush_delayed_work(&adev->gfx.gfx_off_delay_work); amdgpu_ras_suspend(adev); @@ -5031,7 +5027,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; spin_lock(&ring->sched.job_list_lock); @@ -5170,7 +5166,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; /* Clear job fence from fence drv to avoid force_completion @@ -5637,7 +5633,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_stop(&ring->sched, job ? &job->base : NULL); @@ -5706,7 +5702,7 @@ skip_hw_reset: for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_start(&ring->sched, true); @@ -6061,7 +6057,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_stop(&ring->sched, NULL); @@ -6189,7 +6185,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_start(&ring->sched, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index cc69005f5b46..586f4d03039d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -211,6 +211,7 @@ int amdgpu_seamless = -1; /* auto */ uint amdgpu_debug_mask; int amdgpu_agp = -1; /* auto */ int amdgpu_wbrf = -1; +int amdgpu_damage_clips = -1; /* auto */ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -860,6 +861,18 @@ MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto (defau module_param_named(backlight, amdgpu_backlight, bint, 0444); /** + * DOC: damageclips (int) + * Enable or disable damage clips support. If damage clips support is disabled, + * we will force full frame updates, irrespective of what user space sends to + * us. + * + * Defaults to -1 (where it is enabled unless a PSR-SU display is detected). + */ +MODULE_PARM_DESC(damageclips, + "Damage clips support (0 = disable, 1 = enable, -1 auto (default))"); +module_param_named(damageclips, amdgpu_damage_clips, int, 0444); + +/** * DOC: tmz (int) * Trusted Memory Zone (TMZ) is a method to protect data being written * to or read from memory. @@ -2255,6 +2268,10 @@ retry_init: if (ret) goto err_pci; + ret = amdgpu_amdkfd_drm_client_create(adev); + if (ret) + goto err_pci; + /* * 1. don't init fbdev on hw without DCE * 2. don't init fbdev if there are no connectors @@ -2472,6 +2489,7 @@ static int amdgpu_pmops_suspend(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + adev->suspend_complete = false; if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else if (amdgpu_acpi_is_s3_active(adev)) @@ -2486,6 +2504,7 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + adev->suspend_complete = true; if (amdgpu_acpi_should_gpu_reset(adev)) return amdgpu_asic_reset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 73b8cca35bab..c623e23049d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -121,6 +121,7 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev) struct amdgpu_bo_param bp; dma_addr_t dma_addr; struct page *p; + unsigned long x; int ret; if (adev->gart.bo != NULL) @@ -130,6 +131,10 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev) if (!p) return -ENOMEM; + /* assign pages to this device */ + for (x = 0; x < (1UL << order); x++) + p[x].mapping = adev->mman.bdev.dev_mapping; + /* If the hardware does not support UTCL2 snooping of the CPU caches * then set_memory_wc() could be used as a workaround to mark the pages * as write combine memory. @@ -223,6 +228,7 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev) unsigned int order = get_order(adev->gart.table_size); struct sg_table *sg = adev->gart.bo->tbo.sg; struct page *p; + unsigned long x; int ret; ret = amdgpu_bo_reserve(adev->gart.bo, false); @@ -234,6 +240,8 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev) sg_free_table(sg); kfree(sg); p = virt_to_page(adev->gart.ptr); + for (x = 0; x < (1UL << order); x++) + p[x].mapping = NULL; __free_pages(p, order); adev->gart.ptr = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index b9674c57c436..6ddc8e3360e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -723,8 +723,15 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state) { - schedule_delayed_work(&adev->gfx.gfx_off_delay_work, + /* If going to s2idle, no need to wait */ + if (adev->in_s0ix) { + if (!amdgpu_dpm_set_powergating_by_smu(adev, + AMD_IP_BLOCK_TYPE_GFX, true)) + adev->gfx.gfx_off_state = true; + } else { + schedule_delayed_work(&adev->gfx.gfx_off_delay_work, delay); + } } } else { if (adev->gfx.gfx_off_req_count == 0) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 468a67b302d4..ca5c86e5f7cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -362,7 +362,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size } } - if (copy_to_user((char *)buf, context->mem_context.shared_buf, shared_buf_len)) + if (copy_to_user((char *)&buf[copy_pos], context->mem_context.shared_buf, shared_buf_len)) ret = -EFAULT; err_free_shared_buf: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 45424ebf9681..5505d646f43a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -635,6 +635,7 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring) ring->name); ring->sched.ready = !r; + return r; } @@ -717,3 +718,14 @@ void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring) if (ring->is_sw_ring) amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE); } + +bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring) +{ + if (!ring) + return false; + + if (ring->no_scheduler || !drm_sched_wqueue_ready(&ring->sched)) + return false; + + return true; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index bbb53720a018..fe1a61eb6e4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -450,5 +450,5 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, int amdgpu_ib_pool_init(struct amdgpu_device *adev); void amdgpu_ib_pool_fini(struct amdgpu_device *adev); int amdgpu_ib_ring_tests(struct amdgpu_device *adev); - +bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 08916538a615..8db880244324 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -221,8 +221,23 @@ static struct attribute *amdgpu_vram_mgr_attributes[] = { NULL }; +static umode_t amdgpu_vram_attrs_is_visible(struct kobject *kobj, + struct attribute *attr, int i) +{ + struct device *dev = kobj_to_dev(kobj); + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + + if (attr == &dev_attr_mem_info_vram_vendor.attr && + !adev->gmc.vram_vendor) + return 0; + + return attr->mode; +} + const struct attribute_group amdgpu_vram_mgr_attr_group = { - .attrs = amdgpu_vram_mgr_attributes + .attrs = amdgpu_vram_mgr_attributes, + .is_visible = amdgpu_vram_attrs_is_visible }; /** diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index 6f7c031dd197..f24e34dc33d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -204,6 +204,12 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32(mmIH_RB_CNTL); tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(mmIH_RB_CNTL, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; + WREG32(mmIH_RB_CNTL, tmp); } return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index b8c47e0cf37a..c19681492efa 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -216,6 +216,11 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index d63cab294883..dcdecb18b230 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4027,8 +4027,6 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) err = 0; adev->gfx.mec2_fw = NULL; } - amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); - amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT); gfx_v10_0_check_fw_write_wait(adev); out: @@ -6589,7 +6587,7 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m, #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); #endif - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, prop->allow_tunneling); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 0ea0866c261f..4f3bfdc75b37 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -107,23 +107,6 @@ static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a) }; -static const struct soc15_reg_golden golden_settings_gc_11_5_0[] = { - SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_DEBUG5, 0xffffffff, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL, 0xffffffff, 0xf37fff3f), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xfffffffb, 0x00f40188), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x80009007), - SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf1ffffff, 0x00880007), - SOC15_REG_GOLDEN_VALUE(GC, 0, regPC_CONFIG_CNTL_1, 0xffffffff, 0x00010000), - SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), - SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL2, 0x007f0000, 0x00000000), - SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xffcfffff, 0x0000200a), - SOC15_REG_GOLDEN_VALUE(GC, 0, regUTCL1_CTRL_2, 0xffffffff, 0x0000048f) -}; - #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -304,11 +287,6 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_11_0_1, (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); break; - case IP_VERSION(11, 5, 0): - soc15_program_register_sequence(adev, - golden_settings_gc_11_5_0, - (const u32)ARRAY_SIZE(golden_settings_gc_11_5_0)); - break; default: break; } @@ -3846,7 +3824,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, prop->allow_tunneling); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 69c500910746..3bc6943365a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3034,6 +3034,14 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) gfx_v9_0_cp_gfx_enable(adev, true); + /* Now only limit the quirk on the APU gfx9 series and already + * confirmed that the APU gfx10/gfx11 needn't such update. + */ + if (adev->flags & AMD_IS_APU && + adev->in_s3 && !adev->suspend_complete) { + DRM_INFO(" Will skip the CSB packet resubmit\n"); + return 0; + } r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 42e103d7077d..59d9215e5556 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -915,8 +915,8 @@ static int gmc_v6_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } static int gmc_v6_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index efc16e580f1e..45a2f8e031a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1099,8 +1099,8 @@ static int gmc_v7_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } static int gmc_v7_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index ff4ae73d27ec..4422b27a3cc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1219,8 +1219,8 @@ static int gmc_v8_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } static int gmc_v8_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index f9039d64ff2d..e67a62db9e12 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1947,13 +1947,6 @@ static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev) static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev) { - static const u32 regBIF_BIOS_SCRATCH_4 = 0x50; - u32 vram_info; - - if (!amdgpu_sriov_vf(adev)) { - vram_info = RREG32(regBIF_BIOS_SCRATCH_4); - adev->gmc.vram_vendor = vram_info & 0xF; - } adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; adev->gmc.vram_width = 128 * 64; } @@ -2340,8 +2333,8 @@ static int gmc_v9_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index aecad530b10a..2c02ae69883d 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -215,6 +215,11 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index d9ed7332d805..ad4ad39f128f 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -418,6 +418,12 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c index 8fb05eae340a..b8da0fc29378 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c @@ -418,6 +418,13 @@ static u32 ih_v6_1_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index bc38b90f8cf8..88ea58d5c4ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -674,14 +674,6 @@ static int jpeg_v4_0_set_powergating_state(void *handle, return ret; } -static int jpeg_v4_0_set_interrupt_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - unsigned type, - enum amdgpu_interrupt_state state) -{ - return 0; -} - static int jpeg_v4_0_set_ras_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned int type, @@ -765,7 +757,6 @@ static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev) } static const struct amdgpu_irq_src_funcs jpeg_v4_0_irq_funcs = { - .set = jpeg_v4_0_set_interrupt_state, .process = jpeg_v4_0_process_interrupt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 6ede85b28cc8..78b74daf4eeb 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -181,7 +181,6 @@ static int jpeg_v4_0_5_hw_fini(void *handle) RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) jpeg_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); } - amdgpu_irq_put(adev, &adev->jpeg.inst->irq, 0); return 0; } @@ -516,14 +515,6 @@ static int jpeg_v4_0_5_set_powergating_state(void *handle, return ret; } -static int jpeg_v4_0_5_set_interrupt_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - unsigned type, - enum amdgpu_interrupt_state state) -{ - return 0; -} - static int jpeg_v4_0_5_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -603,7 +594,6 @@ static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev) } static const struct amdgpu_irq_src_funcs jpeg_v4_0_5_irq_funcs = { - .set = jpeg_v4_0_5_set_interrupt_state, .process = jpeg_v4_0_5_process_interrupt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index e64b33115848..de93614726c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -442,6 +442,12 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index e90f33780803..b4723d68eab0 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -431,6 +431,12 @@ static void nbio_v7_9_init_registers(struct amdgpu_device *adev) u32 inst_mask; int i; + if (amdgpu_sriov_vf(adev)) + adev->rmmio_remap.reg_offset = + SOC15_REG_OFFSET( + NBIO, 0, + regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) + << 2; WREG32_SOC15(NBIO, 0, regXCC_DOORBELL_FENCE, 0xff & ~(adev->gfx.xcc_mask)); diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 9a24f17a5750..cada9f300a7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -119,6 +119,12 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32(IH_RB_CNTL); tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(IH_RB_CNTL, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; + WREG32(IH_RB_CNTL, tmp); } return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 15033efec2ba..1c614451dead 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -574,11 +574,34 @@ soc15_asic_reset_method(struct amdgpu_device *adev) return AMD_RESET_METHOD_MODE1; } +static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + + /* Will reset for the following suspend abort cases. + * 1) Only reset limit on APU side, dGPU hasn't checked yet. + * 2) S3 suspend abort and TOS already launched. + */ + if (adev->flags & AMD_IS_APU && adev->in_s3 && + !adev->suspend_complete && + sol_reg) + return true; + + return false; +} + static int soc15_asic_reset(struct amdgpu_device *adev) { /* original raven doesn't have full asic reset */ - if ((adev->apu_flags & AMD_APU_IS_RAVEN) || - (adev->apu_flags & AMD_APU_IS_RAVEN2)) + /* On the latest Raven, the GPU reset can be performed + * successfully. So now, temporarily enable it for the + * S3 suspend abort case. + */ + if (((adev->apu_flags & AMD_APU_IS_RAVEN) || + (adev->apu_flags & AMD_APU_IS_RAVEN2)) && + !soc15_need_reset_on_resume(adev)) return 0; switch (soc15_asic_reset_method(adev)) { @@ -1302,6 +1325,10 @@ static int soc15_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (soc15_need_reset_on_resume(adev)) { + dev_info(adev->dev, "S3 suspend abort case, let's reset ASIC.\n"); + soc15_asic_reset(adev); + } return soc15_common_hw_init(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 48c6efcdeac9..4d7188912edf 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -50,13 +50,13 @@ static const struct amd_ip_funcs soc21_common_ip_funcs; /* SOC21 */ static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn0[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn1[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)}, }; static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn0 = { diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 917707bba7f3..450b6e831509 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -219,6 +219,12 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 169ed400ee7b..8ab01ae919d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -2018,22 +2018,6 @@ static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_sta } /** - * vcn_v4_0_set_interrupt_state - set VCN block interrupt state - * - * @adev: amdgpu_device pointer - * @source: interrupt sources - * @type: interrupt types - * @state: interrupt states - * - * Set VCN block interrupt state - */ -static int vcn_v4_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, enum amdgpu_interrupt_state state) -{ - return 0; -} - -/** * vcn_v4_0_set_ras_interrupt_state - set VCN block RAS interrupt state * * @adev: amdgpu_device pointer @@ -2097,7 +2081,6 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_ } static const struct amdgpu_irq_src_funcs vcn_v4_0_irq_funcs = { - .set = vcn_v4_0_set_interrupt_state, .process = vcn_v4_0_process_interrupt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 2eda30e78f61..49e4c3c09aca 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -269,8 +269,6 @@ static int vcn_v4_0_5_hw_fini(void *handle) vcn_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); } } - - amdgpu_irq_put(adev, &adev->vcn.inst[i].irq, 0); } return 0; @@ -1669,22 +1667,6 @@ static int vcn_v4_0_5_set_powergating_state(void *handle, enum amd_powergating_s } /** - * vcn_v4_0_5_set_interrupt_state - set VCN block interrupt state - * - * @adev: amdgpu_device pointer - * @source: interrupt sources - * @type: interrupt types - * @state: interrupt states - * - * Set VCN block interrupt state - */ -static int vcn_v4_0_5_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, enum amdgpu_interrupt_state state) -{ - return 0; -} - -/** * vcn_v4_0_5_process_interrupt - process VCN block interrupt * * @adev: amdgpu_device pointer @@ -1726,7 +1708,6 @@ static int vcn_v4_0_5_process_interrupt(struct amdgpu_device *adev, struct amdgp } static const struct amdgpu_irq_src_funcs vcn_v4_0_5_irq_funcs = { - .set = vcn_v4_0_5_set_interrupt_state, .process = vcn_v4_0_5_process_interrupt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index d364c6dd152c..bf68e18e3824 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -373,6 +373,12 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index ddfc6941f9d5..db66e6cccaf2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -421,6 +421,12 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index df75863393fc..d1caaf0e6a7c 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -674,7 +674,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0x86ea6a6a, 0x8f6e837a, 0xb96ee0c2, 0xbf800002, 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf810000, + 0xbe801f6c, 0xbf9b0000, }; static const uint32_t cwsr_trap_nv1x_hex[] = { @@ -1091,7 +1091,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xb9eef807, 0x876dff6d, 0x0000ffff, 0x87fe7e7e, 0x87ea6a6a, 0xb9faf802, - 0xbe80226c, 0xbf810000, + 0xbe80226c, 0xbf9b0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0x00000000, @@ -1574,7 +1574,7 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0x86ea6a6a, 0x8f6e837a, 0xb96ee0c2, 0xbf800002, 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf810000, + 0xbe801f6c, 0xbf9b0000, }; static const uint32_t cwsr_trap_aldebaran_hex[] = { @@ -2065,7 +2065,7 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0x86ea6a6a, 0x8f6e837a, 0xb96ee0c2, 0xbf800002, 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf810000, + 0xbe801f6c, 0xbf9b0000, }; static const uint32_t cwsr_trap_gfx10_hex[] = { @@ -2500,7 +2500,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x876dff6d, 0x0000ffff, 0x87fe7e7e, 0x87ea6a6a, 0xb9faf802, 0xbe80226c, - 0xbf810000, 0xbf9f0000, + 0xbf9b0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, }; @@ -2944,7 +2944,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = { 0xb8eef802, 0xbf0d866e, 0xbfa20002, 0xb97af802, 0xbe80486c, 0xb97af802, - 0xbe804a6c, 0xbfb00000, + 0xbe804a6c, 0xbfb10000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0x00000000, @@ -3436,5 +3436,5 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { 0x86ea6a6a, 0x8f6e837a, 0xb96ee0c2, 0xbf800002, 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf810000, + 0xbe801f6c, 0xbf9b0000, }; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index e0140df0b0ec..71b3dc0c7363 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -1104,7 +1104,7 @@ L_RETURN_WITHOUT_PRIV: s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution L_END_PGM: - s_endpgm + s_endpgm_saved end function write_hwreg_to_mem(s, s_rsrc, s_mem_offset) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index e506411ad28a..bb26338204f4 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -921,7 +921,7 @@ L_RESTORE: /* the END */ /**************************************************************************/ L_END_PGM: - s_endpgm + s_endpgm_saved end diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index ce4c52ec34d8..80e90fdef291 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1442,7 +1442,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT); /* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */ - amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv); + err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv); + if (err) + goto sync_memory_failed; } mutex_unlock(&p->mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index f856901055d3..bdc01ca9609a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -574,7 +574,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, prange->last); - addr = prange->start << PAGE_SHIFT; + addr = migrate->start; src = (uint64_t *)(scratch + npages); dst = scratch; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 8b7fed913526..22cbfa1bdadd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -170,6 +170,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 15277f1d5cf0..826bc4f6c8a7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -55,8 +55,8 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); if (has_wa_flag) { - uint32_t wa_mask = minfo->update_flag == UPDATE_FLAG_DBG_WA_ENABLE ? - 0xffff : 0xffffffff; + uint32_t wa_mask = + (minfo->update_flag & UPDATE_FLAG_DBG_WA_ENABLE) ? 0xffff : 0xffffffff; m->compute_static_thread_mgmt_se0 = wa_mask; m->compute_static_thread_mgmt_se1 = wa_mask; @@ -224,6 +224,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 42d881809dc7..697b6d530d12 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -303,6 +303,15 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, update_cu_mask(mm, mqd, minfo, 0); set_priority(m, q); + if (minfo && KFD_GC_VERSION(mm->dev) >= IP_VERSION(9, 4, 2)) { + if (minfo->update_flag & UPDATE_FLAG_IS_GWS) + m->compute_resource_limits |= + COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK; + else + m->compute_resource_limits &= + ~COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK; + } + q->is_active = QUEUE_IS_ACTIVE(*q); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 17fbedbf3651..80320b8603fc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -532,6 +532,7 @@ struct queue_properties { enum mqd_update_flag { UPDATE_FLAG_DBG_WA_ENABLE = 1, UPDATE_FLAG_DBG_WA_DISABLE = 2, + UPDATE_FLAG_IS_GWS = 4, /* quirk for gfx9 IP */ }; struct mqd_update_info { @@ -1488,10 +1489,15 @@ void kfd_dec_compute_active(struct kfd_node *dev); /* Cgroup Support */ /* Check with device cgroup if @kfd device is accessible */ -static inline int kfd_devcgroup_check_permission(struct kfd_node *kfd) +static inline int kfd_devcgroup_check_permission(struct kfd_node *node) { #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) - struct drm_device *ddev = adev_to_drm(kfd->adev); + struct drm_device *ddev; + + if (node->xcp) + ddev = node->xcp->ddev; + else + ddev = adev_to_drm(node->adev); return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR, ddev->render->index, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 43eff221eae5..4858112f9a53 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -95,6 +95,7 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, void *gws) { + struct mqd_update_info minfo = {0}; struct kfd_node *dev = NULL; struct process_queue_node *pqn; struct kfd_process_device *pdd; @@ -146,9 +147,10 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, } pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0; + minfo.update_flag = gws ? UPDATE_FLAG_IS_GWS : 0; return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, - pqn->q, NULL); + pqn->q, &minfo); } void kfd_process_dequeue_from_all_devices(struct kfd_process *p) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index e5f7c92eebcb..6ed2ec381aaa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1638,12 +1638,10 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, else mode = UNKNOWN_MEMORY_PARTITION_MODE; - if (pcache->cache_level == 2) - pcache->cache_size = pcache_info[cache_type].cache_size * num_xcc; - else if (mode) - pcache->cache_size = pcache_info[cache_type].cache_size / mode; - else - pcache->cache_size = pcache_info[cache_type].cache_size; + pcache->cache_size = pcache_info[cache_type].cache_size; + /* Partition mode only affects L3 cache size */ + if (mode && pcache->cache_level == 3) + pcache->cache_size /= mode; if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE) pcache->cache_type |= HSA_CACHE_TYPE_DATA; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d4f525b66a09..1a9bbb04bd5e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -272,6 +272,7 @@ static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc, { u32 v_blank_start, v_blank_end, h_position, v_position; struct amdgpu_crtc *acrtc = NULL; + struct dc *dc = adev->dm.dc; if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc)) return -EINVAL; @@ -284,6 +285,9 @@ static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc, return 0; } + if (dc && dc->caps.ips_support && dc->idle_optimizations_allowed) + dc_allow_idle_optimizations(dc, false); + /* * TODO rework base driver to use values directly. * for now parse it back into reg-format @@ -1715,7 +1719,10 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.nbio_reg_offsets = adev->reg_offset[NBIO_HWIP][0]; init_data.clk_reg_offsets = adev->reg_offset[CLK_HWIP][0]; - init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL; + if (amdgpu_dc_debug_mask & DC_DISABLE_IPS) + init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL; + + init_data.flags.disable_ips_in_vpb = 1; /* Enable DWB for tested platforms only */ if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) @@ -1836,21 +1843,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) DRM_ERROR("amdgpu: fail to register dmub aux callback"); goto error; } - if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) { - DRM_ERROR("amdgpu: fail to register dmub hpd callback"); - goto error; - } - if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) { - DRM_ERROR("amdgpu: fail to register dmub hpd callback"); - goto error; - } - } - - /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive. - * It is expected that DMUB will resend any pending notifications at this point, for - * example HPD from DPIA. - */ - if (dc_is_dmub_outbox_supported(adev->dm.dc)) { + /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive. + * It is expected that DMUB will resend any pending notifications at this point. Note + * that hpd and hpd_irq handler registration are deferred to register_hpd_handlers() to + * align legacy interface initialization sequence. Connection status will be proactivly + * detected once in the amdgpu_dm_initialize_drm_device. + */ dc_enable_dmub_outbox(adev->dm.dc); /* DPIA trace goes to dmesg logs only if outbox is enabled */ @@ -1949,7 +1947,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) &adev->dm.dmub_bo_gpu_addr, &adev->dm.dmub_bo_cpu_addr); - if (adev->dm.hpd_rx_offload_wq) { + if (adev->dm.hpd_rx_offload_wq && adev->dm.dc) { for (i = 0; i < adev->dm.dc->caps.max_links; i++) { if (adev->dm.hpd_rx_offload_wq[i].wq) { destroy_workqueue(adev->dm.hpd_rx_offload_wq[i].wq); @@ -2280,6 +2278,7 @@ static int dm_sw_fini(void *handle) if (adev->dm.dmub_srv) { dmub_srv_destroy(adev->dm.dmub_srv); + kfree(adev->dm.dmub_srv); adev->dm.dmub_srv = NULL; } @@ -3529,6 +3528,14 @@ static void register_hpd_handlers(struct amdgpu_device *adev) int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT; int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT; + if (dc_is_dmub_outbox_supported(adev->dm.dc)) { + if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) + DRM_ERROR("amdgpu: fail to register dmub hpd callback"); + + if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) + DRM_ERROR("amdgpu: fail to register dmub hpd callback"); + } + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { @@ -3557,10 +3564,6 @@ static void register_hpd_handlers(struct amdgpu_device *adev) handle_hpd_rx_irq, (void *) aconnector); } - - if (adev->dm.hpd_rx_offload_wq) - adev->dm.hpd_rx_offload_wq[connector->index].aconnector = - aconnector; } } @@ -4554,6 +4557,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) goto fail; } + if (dm->hpd_rx_offload_wq) + dm->hpd_rx_offload_wq[aconnector->base.index].aconnector = + aconnector; + if (!dc_link_detect_connection_type(link, &new_connection_type)) DRM_ERROR("KMS: Failed to detect connector\n"); @@ -5212,6 +5219,7 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, struct drm_plane_state *new_plane_state, struct drm_crtc_state *crtc_state, struct dc_flip_addrs *flip_addrs, + bool is_psr_su, bool *dirty_regions_changed) { struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); @@ -5236,6 +5244,10 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, num_clips = drm_plane_get_damage_clips_count(new_plane_state); clips = drm_plane_get_damage_clips(new_plane_state); + if (num_clips && (!amdgpu_damage_clips || (amdgpu_damage_clips < 0 && + is_psr_su))) + goto ffu; + if (!dm_crtc_state->mpo_requested) { if (!num_clips || num_clips > DC_MAX_DIRTY_RECTS) goto ffu; @@ -6187,7 +6199,9 @@ create_stream_for_sink(struct drm_connector *connector, if (recalculate_timing) { freesync_mode = get_highest_refresh_rate_mode(aconnector, false); drm_mode_copy(&saved_mode, &mode); + saved_mode.picture_aspect_ratio = mode.picture_aspect_ratio; drm_mode_copy(&mode, freesync_mode); + mode.picture_aspect_ratio = saved_mode.picture_aspect_ratio; } else { decide_crtc_timing_for_drm_display_mode( &mode, preferred_mode, scale); @@ -6520,10 +6534,15 @@ amdgpu_dm_connector_late_register(struct drm_connector *connector) static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector) { struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); - struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); struct dc_link *dc_link = aconnector->dc_link; struct dc_sink *dc_em_sink = aconnector->dc_em_sink; struct edid *edid; + struct i2c_adapter *ddc; + + if (dc_link && dc_link->aux_mode) + ddc = &aconnector->dm_dp_aux.aux.ddc; + else + ddc = &aconnector->i2c->base; /* * Note: drm_get_edid gets edid in the following order: @@ -6531,7 +6550,7 @@ static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector) * 2) firmware EDID if set via edid_firmware module parameter * 3) regular DDC read. */ - edid = drm_get_edid(connector, &amdgpu_connector->ddc_bus->aux.ddc); + edid = drm_get_edid(connector, ddc); if (!edid) { DRM_ERROR("No EDID found on connector: %s.\n", connector->name); return; @@ -6572,12 +6591,18 @@ static int get_modes(struct drm_connector *connector) static void create_eml_sink(struct amdgpu_dm_connector *aconnector) { struct drm_connector *connector = &aconnector->base; - struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(&aconnector->base); + struct dc_link *dc_link = aconnector->dc_link; struct dc_sink_init_data init_params = { .link = aconnector->dc_link, .sink_signal = SIGNAL_TYPE_VIRTUAL }; struct edid *edid; + struct i2c_adapter *ddc; + + if (dc_link->aux_mode) + ddc = &aconnector->dm_dp_aux.aux.ddc; + else + ddc = &aconnector->i2c->base; /* * Note: drm_get_edid gets edid in the following order: @@ -6585,7 +6610,7 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector) * 2) firmware EDID if set via edid_firmware module parameter * 3) regular DDC read. */ - edid = drm_get_edid(connector, &amdgpu_connector->ddc_bus->aux.ddc); + edid = drm_get_edid(connector, ddc); if (!edid) { DRM_ERROR("No EDID found on connector: %s.\n", connector->name); return; @@ -8291,6 +8316,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, fill_dc_dirty_rects(plane, old_plane_state, new_plane_state, new_crtc_state, &bundle->flip_addrs[planes_count], + acrtc_state->stream->link->psr_settings.psr_version == + DC_PSR_VERSION_SU_1, &dirty_rects_changed); /* @@ -8976,16 +9003,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) trace_amdgpu_dm_atomic_commit_tail_begin(state); - if (dm->dc->caps.ips_support) { - for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { - if (new_con_state->crtc && - new_con_state->crtc->state->active && - drm_atomic_crtc_needs_modeset(new_con_state->crtc->state)) { - dc_dmub_srv_apply_idle_power_optimizations(dm->dc, false); - break; - } - } - } + if (dm->dc->caps.ips_support && dm->dc->idle_optimizations_allowed) + dc_allow_idle_optimizations(dm->dc, false); drm_atomic_helper_update_legacy_modeset_state(dev, state); drm_dp_mst_atomic_wait_for_dependencies(state); @@ -9188,6 +9207,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) * To fix this, DC should permit updating only stream properties. */ dummy_updates = kzalloc(sizeof(struct dc_surface_update) * MAX_SURFACES, GFP_ATOMIC); + if (!dummy_updates) { + DRM_ERROR("Failed to allocate memory for dummy_updates.\n"); + continue; + } for (j = 0; j < status->plane_count; j++) dummy_updates[j].surface = status->plane_states[0]; @@ -10728,11 +10751,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, goto fail; } - ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); - if (ret) { - DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); - ret = -EINVAL; - goto fail; + if (dc_resource_is_dsc_encoding_supported(dc)) { + ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); + if (ret) { + DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); + ret = -EINVAL; + goto fail; + } } ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars); @@ -11144,14 +11169,23 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (range->flags != 1) continue; - amdgpu_dm_connector->min_vfreq = range->min_vfreq; - amdgpu_dm_connector->max_vfreq = range->max_vfreq; - amdgpu_dm_connector->pixel_clock_mhz = - range->pixel_clock_mhz * 10; - connector->display_info.monitor_range.min_vfreq = range->min_vfreq; connector->display_info.monitor_range.max_vfreq = range->max_vfreq; + if (edid->revision >= 4) { + if (data->pad2 & DRM_EDID_RANGE_OFFSET_MIN_VFREQ) + connector->display_info.monitor_range.min_vfreq += 255; + if (data->pad2 & DRM_EDID_RANGE_OFFSET_MAX_VFREQ) + connector->display_info.monitor_range.max_vfreq += 255; + } + + amdgpu_dm_connector->min_vfreq = + connector->display_info.monitor_range.min_vfreq; + amdgpu_dm_connector->max_vfreq = + connector->display_info.monitor_range.max_vfreq; + amdgpu_dm_connector->pixel_clock_mhz = + range->pixel_clock_mhz * 10; + break; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 85b7f58a7f35..c27063305a13 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -67,6 +67,8 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps) /* Workaround for some monitors that do not clear DPCD 0x317 if FreeSync is unsupported */ case drm_edid_encode_panel_id('A', 'U', 'O', 0xA7AB): case drm_edid_encode_panel_id('A', 'U', 'O', 0xE69B): + case drm_edid_encode_panel_id('B', 'O', 'E', 0x092A): + case drm_edid_encode_panel_id('L', 'G', 'D', 0x06D1): DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id); edid_caps->panel_patch.remove_sink_ext_caps = true; break; @@ -120,6 +122,8 @@ enum dc_edid_status dm_helpers_parse_edid_caps( edid_caps->edid_hdmi = connector->display_info.is_hdmi; + apply_edid_quirks(edid_buf, edid_caps); + sad_count = drm_edid_to_sad((struct edid *) edid->raw_edid, &sads); if (sad_count <= 0) return result; @@ -146,8 +150,6 @@ enum dc_edid_status dm_helpers_parse_edid_caps( else edid_caps->speaker_flags = DEFAULT_SPEAKER_LOCATION; - apply_edid_quirks(edid_buf, edid_caps); - kfree(sads); kfree(sadb); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 58b880acb087..3390f0d8420a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -711,7 +711,7 @@ static inline int dm_irq_state(struct amdgpu_device *adev, { bool st; enum dc_irq_source irq_source; - + struct dc *dc = adev->dm.dc; struct amdgpu_crtc *acrtc = adev->mode_info.crtcs[crtc_id]; if (!acrtc) { @@ -729,6 +729,9 @@ static inline int dm_irq_state(struct amdgpu_device *adev, st = (state == AMDGPU_IRQ_STATE_ENABLE); + if (dc && dc->caps.ips_support && dc->idle_optimizations_allowed) + dc_allow_idle_optimizations(dc, false); + dc_interrupt_set(adev->dm.dc, irq_source, st); return 0; } diff --git a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c index f2dfa96f9ef5..39530b2ea495 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c @@ -94,7 +94,7 @@ static void calculate_bandwidth( const uint32_t s_high = 7; const uint32_t dmif_chunk_buff_margin = 1; - uint32_t max_chunks_fbc_mode; + uint32_t max_chunks_fbc_mode = 0; int32_t num_cursor_lines; int32_t i, j, k; diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 960c4b4f6ddf..05f392501c0a 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1850,19 +1850,21 @@ static enum bp_result get_firmware_info_v3_2( /* Vega12 */ smu_info_v3_2 = GET_IMAGE(struct atom_smu_info_v3_2, DATA_TABLES(smu_info)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage); if (!smu_info_v3_2) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage); + info->default_engine_clk = smu_info_v3_2->bootup_dcefclk_10khz * 10; } else if (revision.minor == 3) { /* Vega20 */ smu_info_v3_3 = GET_IMAGE(struct atom_smu_info_v3_3, DATA_TABLES(smu_info)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage); if (!smu_info_v3_3) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage); + info->default_engine_clk = smu_info_v3_3->bootup_dcefclk_10khz * 10; } @@ -2422,10 +2424,11 @@ static enum bp_result get_integrated_info_v11( info_v11 = GET_IMAGE(struct atom_integrated_system_info_v1_11, DATA_TABLES(integratedsysteminfo)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage); if (info_v11 == NULL) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage); + info->gpu_cap_info = le32_to_cpu(info_v11->gpucapinfo); /* @@ -2637,11 +2640,12 @@ static enum bp_result get_integrated_info_v2_1( info_v2_1 = GET_IMAGE(struct atom_integrated_system_info_v2_1, DATA_TABLES(integratedsysteminfo)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage); if (info_v2_1 == NULL) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage); + info->gpu_cap_info = le32_to_cpu(info_v2_1->gpucapinfo); /* @@ -2799,11 +2803,11 @@ static enum bp_result get_integrated_info_v2_2( info_v2_2 = GET_IMAGE(struct atom_integrated_system_info_v2_2, DATA_TABLES(integratedsysteminfo)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage); - if (info_v2_2 == NULL) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage); + info->gpu_cap_info = le32_to_cpu(info_v2_2->gpucapinfo); /* diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c index a5489fe6875f..aa9fd1dc550a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c @@ -546,6 +546,8 @@ static unsigned int find_dcfclk_for_voltage(const struct vg_dpm_clocks *clock_ta int i; for (i = 0; i < VG_NUM_SOC_VOLTAGE_LEVELS; i++) { + if (i >= VG_NUM_DCFCLK_DPM_LEVELS) + break; if (clock_table->SocVoltage[i] == voltage) return clock_table->DcfClocks[i]; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 9c660d1facc7..e64890259235 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -437,32 +437,32 @@ static struct wm_table ddr5_wm_table = { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, } @@ -474,32 +474,32 @@ static struct wm_table lpddr5_wm_table = { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, } @@ -655,10 +655,13 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1]; uint32_t max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0; uint32_t max_pstate = 0, max_dram_speed_mts = 0, min_dram_speed_mts = 0; + uint32_t num_memps, num_fclk, num_dcfclk; int i; /* Determine min/max p-state values. */ - for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) { + num_memps = (clock_table->NumMemPstatesEnabled > NUM_MEM_PSTATE_LEVELS) ? NUM_MEM_PSTATE_LEVELS : + clock_table->NumMemPstatesEnabled; + for (i = 0; i < num_memps; i++) { uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]); if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts > max_dram_speed_mts) { @@ -670,7 +673,7 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk min_dram_speed_mts = max_dram_speed_mts; min_pstate = max_pstate; - for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) { + for (i = 0; i < num_memps; i++) { uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]); if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts < min_dram_speed_mts) { @@ -699,9 +702,13 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk /* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */ ASSERT(clock_table->NumDcfClkLevelsEnabled > 0); - max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, clock_table->NumFclkLevelsEnabled); + num_fclk = (clock_table->NumFclkLevelsEnabled > NUM_FCLK_DPM_LEVELS) ? NUM_FCLK_DPM_LEVELS : + clock_table->NumFclkLevelsEnabled; + max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, num_fclk); - for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) { + num_dcfclk = (clock_table->NumFclkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS : + clock_table->NumDcfClkLevelsEnabled; + for (i = 0; i < num_dcfclk; i++) { int j; /* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */ diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index aa7c02ba948e..2c424e435962 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3817,7 +3817,9 @@ static void commit_planes_for_stream(struct dc *dc, * programming has completed (we turn on phantom OTG in order * to complete the plane disable for phantom pipes). */ - dc->hwss.apply_ctx_to_hw(dc, context); + + if (dc->hwss.disable_phantom_streams) + dc->hwss.disable_phantom_streams(dc, context); } if (update_type != UPDATE_TYPE_FAST) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index 88c6436b28b6..180ac47868c2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -291,11 +291,14 @@ void dc_state_destruct(struct dc_state *state) dc_stream_release(state->phantom_streams[i]); state->phantom_streams[i] = NULL; } + state->phantom_stream_count = 0; for (i = 0; i < state->phantom_plane_count; i++) { dc_plane_state_release(state->phantom_planes[i]); state->phantom_planes[i] = NULL; } + state->phantom_plane_count = 0; + state->stream_mask = 0; memset(&state->res_ctx, 0, sizeof(state->res_ctx)); memset(&state->pp_display_cfg, 0, sizeof(state->pp_display_cfg)); diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 5d7aa882416b..c9317ea0258e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -434,6 +434,7 @@ struct dc_config { bool EnableMinDispClkODM; bool enable_auto_dpm_test_logs; unsigned int disable_ips; + unsigned int disable_ips_in_vpb; }; enum visual_confirm { diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 2b79a0e5638e..363d522603a2 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -125,7 +125,7 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list) { - struct dc_context *dc_ctx = dc_dmub_srv->ctx; + struct dc_context *dc_ctx; struct dmub_srv *dmub; enum dmub_status status; int i; @@ -133,6 +133,7 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, if (!dc_dmub_srv || !dc_dmub_srv->dmub) return false; + dc_ctx = dc_dmub_srv->ctx; dmub = dc_dmub_srv->dmub; for (i = 0 ; i < count; i++) { @@ -1161,7 +1162,7 @@ void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, con bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait) { - struct dc_context *dc_ctx = dc_dmub_srv->ctx; + struct dc_context *dc_ctx; enum dmub_status status; if (!dc_dmub_srv || !dc_dmub_srv->dmub) @@ -1170,6 +1171,8 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait) if (dc_dmub_srv->ctx->dc->debug.dmcub_emulation) return true; + dc_ctx = dc_dmub_srv->ctx; + if (wait) { if (dc_dmub_srv->ctx->dc->debug.disable_timeout) { do { diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index b08ccb8c68bc..9900dda2eef5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1034,6 +1034,7 @@ enum replay_FW_Message_type { Replay_Msg_Not_Support = -1, Replay_Set_Timing_Sync_Supported, Replay_Set_Residency_Frameupdate_Timer, + Replay_Set_Pseudo_VTotal, }; union replay_error_status { @@ -1089,6 +1090,10 @@ struct replay_settings { uint16_t coasting_vtotal_table[PR_COASTING_TYPE_NUM]; /* Maximum link off frame count */ enum replay_link_off_frame_count_level link_off_frame_count_level; + /* Replay pseudo vtotal for abm + ips on full screen video which can improve ips residency */ + uint16_t abm_with_ips_on_full_screen_video_pseudo_vtotal; + /* Replay last pseudo vtotal set to DMUB */ + uint16_t last_pseudo_vtotal; }; /* To split out "global" and "per-panel" config settings. diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c index e8570060d007..5bca67407c5b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c @@ -290,4 +290,5 @@ void dce_panel_cntl_construct( dce_panel_cntl->base.funcs = &dce_link_panel_cntl_funcs; dce_panel_cntl->base.ctx = init_data->ctx; dce_panel_cntl->base.inst = init_data->inst; + dce_panel_cntl->base.pwrseq_inst = 0; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c index e43f77c11c00..5f97a868ada3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c @@ -56,16 +56,13 @@ static void dpp3_enable_cm_block( static enum dc_lut_mode dpp30_get_gamcor_current(struct dpp *dpp_base) { - enum dc_lut_mode mode; + enum dc_lut_mode mode = LUT_BYPASS; uint32_t state_mode; uint32_t lut_mode; struct dcn3_dpp *dpp = TO_DCN30_DPP(dpp_base); REG_GET(CM_GAMCOR_CONTROL, CM_GAMCOR_MODE_CURRENT, &state_mode); - if (state_mode == 0) - mode = LUT_BYPASS; - if (state_mode == 2) {//Programmable RAM LUT REG_GET(CM_GAMCOR_CONTROL, CM_GAMCOR_SELECT_CURRENT, &lut_mode); if (lut_mode == 0) diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c index ad0df1a72a90..9e96a3ace207 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c @@ -215,4 +215,5 @@ void dcn301_panel_cntl_construct( dcn301_panel_cntl->base.funcs = &dcn301_link_panel_cntl_funcs; dcn301_panel_cntl->base.ctx = init_data->ctx; dcn301_panel_cntl->base.inst = init_data->inst; + dcn301_panel_cntl->base.pwrseq_inst = 0; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c index 03248422d6ff..281be20b1a10 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c @@ -154,8 +154,24 @@ void dcn31_panel_cntl_construct( struct dcn31_panel_cntl *dcn31_panel_cntl, const struct panel_cntl_init_data *init_data) { + uint8_t pwrseq_inst = 0xF; + dcn31_panel_cntl->base.funcs = &dcn31_link_panel_cntl_funcs; dcn31_panel_cntl->base.ctx = init_data->ctx; dcn31_panel_cntl->base.inst = init_data->inst; - dcn31_panel_cntl->base.pwrseq_inst = init_data->pwrseq_inst; + + switch (init_data->eng_id) { + case ENGINE_ID_DIGA: + pwrseq_inst = 0; + break; + case ENGINE_ID_DIGB: + pwrseq_inst = 1; + break; + default: + DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", init_data->eng_id); + ASSERT(false); + break; + } + + dcn31_panel_cntl->base.pwrseq_inst = pwrseq_inst; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c index 501388014855..d761b0df2878 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c @@ -203,12 +203,12 @@ void dcn32_link_encoder_construct( enc10->base.hpd_source = init_data->hpd_source; enc10->base.connector = init_data->connector; - if (enc10->base.connector.id == CONNECTOR_ID_USBC) - enc10->base.features.flags.bits.DP_IS_USB_C = 1; enc10->base.preferred_engine = ENGINE_ID_UNKNOWN; enc10->base.features = *enc_features; + if (enc10->base.connector.id == CONNECTOR_ID_USBC) + enc10->base.features.flags.bits.DP_IS_USB_C = 1; enc10->base.transmitter = init_data->transmitter; diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c index da94e5309fba..81e349d5835b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c @@ -184,8 +184,6 @@ void dcn35_link_encoder_construct( enc10->base.hpd_source = init_data->hpd_source; enc10->base.connector = init_data->connector; - if (enc10->base.connector.id == CONNECTOR_ID_USBC) - enc10->base.features.flags.bits.DP_IS_USB_C = 1; enc10->base.preferred_engine = ENGINE_ID_UNKNOWN; @@ -240,6 +238,8 @@ void dcn35_link_encoder_construct( } enc10->base.features.flags.bits.HDMI_6GB_EN = 1; + if (enc10->base.connector.id == CONNECTOR_ID_USBC) + enc10->base.features.flags.bits.DP_IS_USB_C = 1; if (bp_funcs->get_connector_speed_cap_info) result = bp_funcs->get_connector_speed_cap_info(enc10->base.ctx->dc_bios, diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 6042a5a6a44f..59ade76ffb18 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -72,11 +72,11 @@ CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 9f37f717a1f8..a0a65e099104 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1112,7 +1112,7 @@ struct pipe_slice_table { struct pipe_ctx *pri_pipe; struct dc_plane_state *plane; int slice_count; - } mpc_combines[MAX_SURFACES]; + } mpc_combines[MAX_PLANES]; int mpc_combine_count; }; @@ -1288,7 +1288,7 @@ static bool update_pipes_with_split_flags(struct dc *dc, struct dc_state *contex return updated; } -static bool should_allow_odm_power_optimization(struct dc *dc, +static bool should_apply_odm_power_optimization(struct dc *dc, struct dc_state *context, struct vba_vars_st *v, int *split, bool *merge) { @@ -1392,9 +1392,12 @@ static void try_odm_power_optimization_and_revalidate( { int i; unsigned int new_vlevel; + unsigned int cur_policy[MAX_PIPES]; - for (i = 0; i < pipe_cnt; i++) + for (i = 0; i < pipe_cnt; i++) { + cur_policy[i] = pipes[i].pipe.dest.odm_combine_policy; pipes[i].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; + } new_vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); @@ -1403,6 +1406,9 @@ static void try_odm_power_optimization_and_revalidate( memset(merge, 0, MAX_PIPES * sizeof(bool)); *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, new_vlevel, split, merge); context->bw_ctx.dml.vba.VoltageLevel = *vlevel; + } else { + for (i = 0; i < pipe_cnt; i++) + pipes[i].pipe.dest.odm_combine_policy = cur_policy[i]; } } @@ -1580,7 +1586,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, } } - if (should_allow_odm_power_optimization(dc, context, vba, split, merge)) + if (should_apply_odm_power_optimization(dc, context, vba, split, merge)) try_odm_power_optimization_and_revalidate( dc, context, pipes, split, merge, vlevel, *pipe_cnt); @@ -2209,7 +2215,8 @@ bool dcn32_internal_validate_bw(struct dc *dc, int i; pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); - dcn32_update_dml_pipes_odm_policy_based_on_context(dc, context, pipes); + if (!dc->config.enable_windowed_mpo_odm) + dcn32_update_dml_pipes_odm_policy_based_on_context(dc, context, pipes); /* repopulate_pipes = 1 means the pipes were either split or merged. In this case * we have to re-calculate the DET allocation and run through DML once more to diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 475c4ec43c01..7ea2bd5374d5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -164,8 +164,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { }, }, .num_states = 5, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .sr_exit_z8_time_us = 210.0, .sr_enter_plus_exit_z8_time_us = 320.0, .fclk_change_latency_us = 24.0, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 64d01a9cd68c..1ba6933d2b36 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -341,9 +341,6 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, break; } - if (dml2->config.bbox_overrides.clks_table.num_states) - p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states; - /* Override from passed values, if available */ for (i = 0; i < p->in_states->num_states; i++) { if (dml2->config.bbox_overrides.sr_exit_latency_us) { @@ -400,7 +397,7 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, } /* Copy clocks tables entries, if available */ if (dml2->config.bbox_overrides.clks_table.num_states) { - + p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states; for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels; i++) { p->in_states->state_array[i].dcfclk_mhz = dml2->config.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz; } @@ -439,6 +436,14 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, } dml2_policy_build_synthetic_soc_states(s, p); + if (dml2->v20.dml_core_ctx.project == dml_project_dcn35 || + dml2->v20.dml_core_ctx.project == dml_project_dcn351) { + // Override last out_state with data from last in_state + // This will ensure that out_state contains max fclk + memcpy(&p->out_states->state_array[p->out_states->num_states - 1], + &p->in_states->state_array[p->in_states->num_states - 1], + sizeof(struct soc_state_bounding_box_st)); + } } void dml2_translate_ip_params(const struct dc *in, struct ip_params_st *out) @@ -793,35 +798,28 @@ static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_p } } -/*TODO no support for mpc combine, need rework - should calculate scaling params based on plane+stream*/ -static struct scaler_data get_scaler_data_for_plane(const struct dc_plane_state *in, const struct dc_state *context) +static struct scaler_data get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc_state *context) { int i; - struct scaler_data data = { 0 }; + struct pipe_ctx *temp_pipe = &context->res_ctx.temp_pipe; + + memset(temp_pipe, 0, sizeof(struct pipe_ctx)); for (i = 0; i < MAX_PIPES; i++) { const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; if (pipe->plane_state == in && !pipe->prev_odm_pipe) { - const struct pipe_ctx *next_pipe = pipe->next_odm_pipe; - - data = context->res_ctx.pipe_ctx[i].plane_res.scl_data; - while (next_pipe) { - data.h_active += next_pipe->plane_res.scl_data.h_active; - data.recout.width += next_pipe->plane_res.scl_data.recout.width; - if (in->rotation == ROTATION_ANGLE_0 || in->rotation == ROTATION_ANGLE_180) { - data.viewport.width += next_pipe->plane_res.scl_data.viewport.width; - } else { - data.viewport.height += next_pipe->plane_res.scl_data.viewport.height; - } - next_pipe = next_pipe->next_odm_pipe; - } + temp_pipe->stream = pipe->stream; + temp_pipe->plane_state = pipe->plane_state; + temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps; + + resource_build_scaling_params(temp_pipe); break; } } ASSERT(i < MAX_PIPES); - return data; + return temp_pipe->plane_res.scl_data; } static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_stream_state *in) @@ -866,7 +864,7 @@ static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned out->ScalerEnabled[location] = false; } -static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_plane_state *in, const struct dc_state *context) +static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_plane_state *in, struct dc_state *context) { const struct scaler_data scaler_data = get_scaler_data_for_plane(in, context); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 26307e599614..2a58a7687bdb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -76,6 +76,11 @@ static void map_hw_resources(struct dml2_context *dml2, in_out_display_cfg->hw.DLGRefClkFreqMHz = 50; } for (j = 0; j < mode_support_info->DPPPerSurface[i]; j++) { + if (i >= __DML2_WRAPPER_MAX_STREAMS_PLANES__) { + dml_print("DML::%s: Index out of bounds: i=%d, __DML2_WRAPPER_MAX_STREAMS_PLANES__=%d\n", + __func__, i, __DML2_WRAPPER_MAX_STREAMS_PLANES__); + break; + } dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i]; dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[num_pipes] = true; dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i]; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 5660f15da291..01493c49bd7a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1183,9 +1183,9 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx) dto_params.timing = &pipe_ctx->stream->timing; dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; if (dccg) { - dccg->funcs->set_dtbclk_dto(dccg, &dto_params); dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst); dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst); + dccg->funcs->set_dtbclk_dto(dccg, &dto_params); } } else if (dccg && dccg->funcs->disable_symclk_se) { dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst, @@ -1476,7 +1476,7 @@ static enum dc_status dce110_enable_stream_timing( return DC_OK; } -static enum dc_status apply_single_controller_ctx_to_hw( +enum dc_status dce110_apply_single_controller_ctx_to_hw( struct pipe_ctx *pipe_ctx, struct dc_state *context, struct dc *dc) @@ -2302,7 +2302,7 @@ enum dc_status dce110_apply_ctx_to_hw( if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe) continue; - status = apply_single_controller_ctx_to_hw( + status = dce110_apply_single_controller_ctx_to_hw( pipe_ctx, context, dc); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h index 08028a1779ae..ed3cc3648e8e 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h @@ -39,6 +39,10 @@ enum dc_status dce110_apply_ctx_to_hw( struct dc *dc, struct dc_state *context); +enum dc_status dce110_apply_single_controller_ctx_to_hw( + struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct dc *dc); void dce110_enable_stream(struct pipe_ctx *pipe_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index e931342fcf4c..931ac8ed7069 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -2561,7 +2561,7 @@ void dcn20_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx) tg->funcs->setup_vertical_interrupt2(tg, start_line); } -static void dcn20_reset_back_end_for_pipe( +void dcn20_reset_back_end_for_pipe( struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_state *context) @@ -2790,18 +2790,17 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx) } if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) { - dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; - dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst); - - phyd32clk = get_phyd32clk_src(link); - dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); - dto_params.otg_inst = tg->inst; dto_params.pixclk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10; dto_params.num_odm_segments = get_odm_segment_count(pipe_ctx); dto_params.timing = &pipe_ctx->stream->timing; dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr); dccg->funcs->set_dtbclk_dto(dccg, &dto_params); + dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; + dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst); + + phyd32clk = get_phyd32clk_src(link); + dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); } else { if (dccg->funcs->enable_symclk_se) dccg->funcs->enable_symclk_se(dccg, stream_enc->stream_enc_inst, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h index b94c85340abf..d950b3e54ec2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h @@ -84,6 +84,10 @@ enum dc_status dcn20_enable_stream_timing( void dcn20_disable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx); void dcn20_enable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx); void dcn20_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx); +void dcn20_reset_back_end_for_pipe( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context); void dcn20_init_blank( struct dc *dc, struct timing_generator *tg); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c index 8e88dcaf88f5..7252f5f781f0 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c @@ -206,28 +206,32 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx) void dcn21_set_pipe(struct pipe_ctx *pipe_ctx) { struct abm *abm = pipe_ctx->stream_res.abm; - uint32_t otg_inst = pipe_ctx->stream_res.tg->inst; + struct timing_generator *tg = pipe_ctx->stream_res.tg; struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl; struct dmcu *dmcu = pipe_ctx->stream->ctx->dc->res_pool->dmcu; + uint32_t otg_inst; + + if (!abm || !tg || !panel_cntl) + return; + + otg_inst = tg->inst; if (dmcu) { dce110_set_pipe(pipe_ctx); return; } - if (abm && panel_cntl) { - if (abm->funcs && abm->funcs->set_pipe_ex) { - abm->funcs->set_pipe_ex(abm, + if (abm->funcs && abm->funcs->set_pipe_ex) { + abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst, panel_cntl->pwrseq_inst); - } else { - dmub_abm_set_pipe(abm, otg_inst, - SET_ABM_PIPE_NORMAL, - panel_cntl->inst, - panel_cntl->pwrseq_inst); - } + } else { + dmub_abm_set_pipe(abm, otg_inst, + SET_ABM_PIPE_NORMAL, + panel_cntl->inst, + panel_cntl->pwrseq_inst); } } @@ -237,34 +241,35 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx, { struct dc_context *dc = pipe_ctx->stream->ctx; struct abm *abm = pipe_ctx->stream_res.abm; + struct timing_generator *tg = pipe_ctx->stream_res.tg; struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl; + uint32_t otg_inst; + + if (!abm || !tg || !panel_cntl) + return false; + + otg_inst = tg->inst; if (dc->dc->res_pool->dmcu) { dce110_set_backlight_level(pipe_ctx, backlight_pwm_u16_16, frame_ramp); return true; } - if (abm != NULL) { - uint32_t otg_inst = pipe_ctx->stream_res.tg->inst; - - if (abm && panel_cntl) { - if (abm->funcs && abm->funcs->set_pipe_ex) { - abm->funcs->set_pipe_ex(abm, - otg_inst, - SET_ABM_PIPE_NORMAL, - panel_cntl->inst, - panel_cntl->pwrseq_inst); - } else { - dmub_abm_set_pipe(abm, - otg_inst, - SET_ABM_PIPE_NORMAL, - panel_cntl->inst, - panel_cntl->pwrseq_inst); - } - } + if (abm->funcs && abm->funcs->set_pipe_ex) { + abm->funcs->set_pipe_ex(abm, + otg_inst, + SET_ABM_PIPE_NORMAL, + panel_cntl->inst, + panel_cntl->pwrseq_inst); + } else { + dmub_abm_set_pipe(abm, + otg_inst, + SET_ABM_PIPE_NORMAL, + panel_cntl->inst, + panel_cntl->pwrseq_inst); } - if (abm && abm->funcs && abm->funcs->set_backlight_level_pwm) + if (abm->funcs && abm->funcs->set_backlight_level_pwm) abm->funcs->set_backlight_level_pwm(abm, backlight_pwm_u16_16, frame_ramp, 0, panel_cntl->inst); else diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 6c9299c7683d..aa36d7a56ca8 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -1474,9 +1474,44 @@ void dcn32_update_dsc_pg(struct dc *dc, } } +void dcn32_disable_phantom_streams(struct dc *dc, struct dc_state *context) +{ + struct dce_hwseq *hws = dc->hwseq; + int i; + + for (i = dc->res_pool->pipe_count - 1; i >= 0 ; i--) { + struct pipe_ctx *pipe_ctx_old = + &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (!pipe_ctx_old->stream) + continue; + + if (dc_state_get_pipe_subvp_type(dc->current_state, pipe_ctx_old) != SUBVP_PHANTOM) + continue; + + if (pipe_ctx_old->top_pipe || pipe_ctx_old->prev_odm_pipe) + continue; + + if (!pipe_ctx->stream || pipe_need_reprogram(pipe_ctx_old, pipe_ctx) || + (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM)) { + struct clock_source *old_clk = pipe_ctx_old->clock_source; + + if (hws->funcs.reset_back_end_for_pipe) + hws->funcs.reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state); + if (hws->funcs.enable_stream_gating) + hws->funcs.enable_stream_gating(dc, pipe_ctx_old); + if (old_clk) + old_clk->funcs->cs_power_down(old_clk); + } + } +} + void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context) { unsigned int i; + enum dc_status status = DC_OK; + struct dce_hwseq *hws = dc->hwseq; for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; @@ -1497,16 +1532,39 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context) } } for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; - - if (new_pipe->stream && dc_state_get_pipe_subvp_type(context, new_pipe) == SUBVP_PHANTOM) { - // If old context or new context has phantom pipes, apply - // the phantom timings now. We can't change the phantom - // pipe configuration safely without driver acquiring - // the DMCUB lock first. - dc->hwss.apply_ctx_to_hw(dc, context); - break; + struct pipe_ctx *pipe_ctx_old = + &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream == NULL) + continue; + + if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) + continue; + + if (pipe_ctx->stream == pipe_ctx_old->stream && + pipe_ctx->stream->link->link_state_valid) { + continue; } + + if (pipe_ctx_old->stream && !pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) + continue; + + if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe) + continue; + + if (hws->funcs.apply_single_controller_ctx_to_hw) + status = hws->funcs.apply_single_controller_ctx_to_hw( + pipe_ctx, + context, + dc); + + ASSERT(status == DC_OK); + +#ifdef CONFIG_DRM_AMD_DC_FP + if (hws->funcs.resync_fifo_dccg_dio) + hws->funcs.resync_fifo_dccg_dio(hws, dc, context); +#endif } } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h index cecf7f0f5671..069e20bc87c0 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h @@ -111,6 +111,8 @@ void dcn32_update_dsc_pg(struct dc *dc, void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context); +void dcn32_disable_phantom_streams(struct dc *dc, struct dc_state *context); + void dcn32_init_blank( struct dc *dc, struct timing_generator *tg); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c index 427cfc8c24a4..e8ac94a005b8 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c @@ -109,6 +109,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = { .get_dcc_en_bits = dcn10_get_dcc_en_bits, .commit_subvp_config = dcn32_commit_subvp_config, .enable_phantom_streams = dcn32_enable_phantom_streams, + .disable_phantom_streams = dcn32_disable_phantom_streams, .subvp_pipe_control_lock = dcn32_subvp_pipe_control_lock, .update_visual_confirm_color = dcn10_update_visual_confirm_color, .subvp_pipe_control_lock_fast = dcn32_subvp_pipe_control_lock_fast, @@ -159,6 +160,8 @@ static const struct hwseq_private_funcs dcn32_private_funcs = { .set_pixels_per_cycle = dcn32_set_pixels_per_cycle, .resync_fifo_dccg_dio = dcn32_resync_fifo_dccg_dio, .is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy, + .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw, + .reset_back_end_for_pipe = dcn20_reset_back_end_for_pipe, }; void dcn32_hw_sequencer_init_functions(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 9c806385ecbd..8b6c49622f3b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -680,7 +680,7 @@ void dcn35_power_down_on_boot(struct dc *dc) bool dcn35_apply_idle_power_optimizations(struct dc *dc, bool enable) { struct dc_link *edp_links[MAX_NUM_EDP]; - int edp_num; + int i, edp_num; if (dc->debug.dmcub_emulation) return true; @@ -688,6 +688,13 @@ bool dcn35_apply_idle_power_optimizations(struct dc *dc, bool enable) dc_get_edp_links(dc, edp_links, &edp_num); if (edp_num == 0 || edp_num > 1) return false; + + for (i = 0; i < dc->current_state->stream_count; ++i) { + struct dc_stream_state *stream = dc->current_state->streams[i]; + + if (!stream->dpms_off && !dc_is_embedded_signal(stream->signal)) + return false; + } } // TODO: review other cases when idle optimization is allowed diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index a54399383318..64ca7c66509b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -379,6 +379,7 @@ struct hw_sequencer_funcs { struct dc_cursor_attributes *cursor_attr); void (*commit_subvp_config)(struct dc *dc, struct dc_state *context); void (*enable_phantom_streams)(struct dc *dc, struct dc_state *context); + void (*disable_phantom_streams)(struct dc *dc, struct dc_state *context); void (*subvp_pipe_control_lock)(struct dc *dc, struct dc_state *context, bool lock, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h index 6137cf09aa54..b3c62a82cb1c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h @@ -165,8 +165,15 @@ struct hwseq_private_funcs { void (*set_pixels_per_cycle)(struct pipe_ctx *pipe_ctx); void (*resync_fifo_dccg_dio)(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context); + enum dc_status (*apply_single_controller_ctx_to_hw)( + struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct dc *dc); bool (*is_dp_dig_pixel_rate_div_policy)(struct pipe_ctx *pipe_ctx); #endif + void (*reset_back_end_for_pipe)(struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context); }; struct dce_hwseq { diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index f74ae0d41d3c..3a6bf77a6873 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -469,6 +469,8 @@ struct resource_context { unsigned int hpo_dp_link_enc_to_link_idx[MAX_HPO_DP2_LINK_ENCODERS]; int hpo_dp_link_enc_ref_cnts[MAX_HPO_DP2_LINK_ENCODERS]; bool is_mpc_3dlut_acquired[MAX_PIPES]; + /* solely used for build scalar data in dml2 */ + struct pipe_ctx temp_pipe; }; struct dce_bw_output { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h index 5dcbaa2db964..e97d964a1791 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h @@ -57,7 +57,7 @@ struct panel_cntl_funcs { struct panel_cntl_init_data { struct dc_context *ctx; uint32_t inst; - uint32_t pwrseq_inst; + uint32_t eng_id; }; struct panel_cntl { diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index c958ef37b78a..77a60aa9f27b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -427,22 +427,18 @@ struct pipe_ctx *resource_get_primary_dpp_pipe(const struct pipe_ctx *dpp_pipe); int resource_get_mpc_slice_index(const struct pipe_ctx *dpp_pipe); /* - * Get number of MPC "cuts" of the plane associated with the pipe. MPC slice - * count is equal to MPC splits + 1. For example if a plane is cut 3 times, it - * will have 4 pieces of slice. - * return - 0 if pipe is not used for a plane with MPCC combine. otherwise - * the number of MPC "cuts" for the plane. + * Get the number of MPC slices associated with the pipe. + * The function returns 0 if the pipe is not associated with an MPC combine + * pipe topology. */ -int resource_get_mpc_slice_count(const struct pipe_ctx *opp_head); +int resource_get_mpc_slice_count(const struct pipe_ctx *pipe); /* - * Get number of ODM "cuts" of the timing associated with the pipe. ODM slice - * count is equal to ODM splits + 1. For example if a timing is cut 3 times, it - * will have 4 pieces of slice. - * return - 0 if pipe is not used for ODM combine. otherwise - * the number of ODM "cuts" for the timing. + * Get the number of ODM slices associated with the pipe. + * The function returns 0 if the pipe is not associated with an ODM combine + * pipe topology. */ -int resource_get_odm_slice_count(const struct pipe_ctx *otg_master); +int resource_get_odm_slice_count(const struct pipe_ctx *pipe); /* Get the ODM slice index counting from 0 from left most slice */ int resource_get_odm_slice_index(const struct pipe_ctx *opp_head); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 37d3027c32dc..cf22b8f28ba6 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -370,30 +370,6 @@ static enum transmitter translate_encoder_to_transmitter( } } -static uint8_t translate_dig_inst_to_pwrseq_inst(struct dc_link *link) -{ - uint8_t pwrseq_inst = 0xF; - struct dc_context *dc_ctx = link->dc->ctx; - - DC_LOGGER_INIT(dc_ctx->logger); - - switch (link->eng_id) { - case ENGINE_ID_DIGA: - pwrseq_inst = 0; - break; - case ENGINE_ID_DIGB: - pwrseq_inst = 1; - break; - default: - DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", link->eng_id); - ASSERT(false); - break; - } - - return pwrseq_inst; -} - - static void link_destruct(struct dc_link *link) { int i; @@ -657,7 +633,7 @@ static bool construct_phy(struct dc_link *link, link->link_id.id == CONNECTOR_ID_LVDS)) { panel_cntl_init_data.ctx = dc_ctx; panel_cntl_init_data.inst = panel_cntl_init_data.ctx->dc_edp_id_count; - panel_cntl_init_data.pwrseq_inst = translate_dig_inst_to_pwrseq_inst(link); + panel_cntl_init_data.eng_id = link->eng_id; link->panel_cntl = link->dc->res_pool->funcs->panel_cntl_create( &panel_cntl_init_data); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c index 8fe66c367850..5b0bc7f6a188 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c @@ -361,7 +361,7 @@ bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const un struct dc_link *dpia_link[MAX_DPIA_NUM] = {0}; int num_dpias = 0; - for (uint8_t i = 0; i < num_streams; ++i) { + for (unsigned int i = 0; i < num_streams; ++i) { if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT) { /* new dpia sst stream, check whether it exceeds max dpia */ if (num_dpias >= MAX_DPIA_NUM) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index dd0d2b206462..5491b707cec8 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -196,7 +196,7 @@ static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_in struct dc_link *link_dpia_primary, *link_dpia_secondary; int total_bw = 0; - for (uint8_t i = 0; i < MAX_PIPES * 2; ++i) { + for (uint8_t i = 0; i < (MAX_PIPES * 2) - 1; ++i) { if (!dc->links[i] || dc->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) continue; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index 5a0b04518956..16a62e018712 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -517,6 +517,7 @@ enum link_training_result dp_check_link_loss_status( { enum link_training_result status = LINK_TRAINING_SUCCESS; union lane_status lane_status; + union lane_align_status_updated dpcd_lane_status_updated; uint8_t dpcd_buf[6] = {0}; uint32_t lane; @@ -532,10 +533,12 @@ enum link_training_result dp_check_link_loss_status( * check lanes status */ lane_status.raw = dp_get_nibble_at_index(&dpcd_buf[2], lane); + dpcd_lane_status_updated.raw = dpcd_buf[4]; if (!lane_status.bits.CHANNEL_EQ_DONE_0 || !lane_status.bits.CR_DONE_0 || - !lane_status.bits.SYMBOL_LOCKED_0) { + !lane_status.bits.SYMBOL_LOCKED_0 || + !dp_is_interlane_aligned(dpcd_lane_status_updated)) { /* if one of the channel equalization, clock * recovery or symbol lock is dropped * consider it as (link has been diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c index e8dda44b23cb..5d36bab0029c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c @@ -619,7 +619,7 @@ static enum link_training_result dpia_training_eq_non_transparent( uint32_t retries_eq = 0; enum dc_status status; enum dc_dp_training_pattern tr_pattern; - uint32_t wait_time_microsec; + uint32_t wait_time_microsec = 0; enum dc_lane_count lane_count = lt_settings->link_settings.lane_count; union lane_align_status_updated dpcd_lane_status_updated = {0}; union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {0}; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c index 5c9a30211c10..fc50931c2aec 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c @@ -205,7 +205,7 @@ enum dc_status core_link_read_dpcd( uint32_t extended_size; /* size of the remaining partitioned address space */ uint32_t size_left_to_read; - enum dc_status status; + enum dc_status status = DC_ERROR_UNEXPECTED; /* size of the next partition to be read from */ uint32_t partition_size; uint32_t data_index = 0; @@ -234,7 +234,7 @@ enum dc_status core_link_write_dpcd( { uint32_t partition_size; uint32_t data_index = 0; - enum dc_status status; + enum dc_status status = DC_ERROR_UNEXPECTED; while (size) { partition_size = dpcd_get_next_partition_size(address, size); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c index 511ff6b5b985..7538b548c572 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c @@ -999,7 +999,7 @@ static struct stream_encoder *dcn301_stream_encoder_create(enum engine_id eng_id vpg = dcn301_vpg_create(ctx, vpg_inst); afmt = dcn301_afmt_create(ctx, afmt_inst); - if (!enc1 || !vpg || !afmt) { + if (!enc1 || !vpg || !afmt || eng_id >= ARRAY_SIZE(stream_enc_regs)) { kfree(enc1); kfree(vpg); kfree(afmt); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index c4d71e7f18af..6f10052caeef 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -1829,7 +1829,21 @@ int dcn32_populate_dml_pipes_from_context( dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt); DC_FP_END(); pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; - pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; + if (dc->config.enable_windowed_mpo_odm && + dc->debug.enable_single_display_2to1_odm_policy) { + switch (resource_get_odm_slice_count(pipe)) { + case 2: + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; + break; + case 4: + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_4to1; + break; + default: + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; + } + } else { + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; + } pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_19; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 761ec9891875..5fdcda8f8602 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -780,8 +780,8 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_z10 = false, .ignore_pg = true, .psp_disabled_wa = true, - .ips2_eval_delay_us = 200, - .ips2_entry_delay_us = 400, + .ips2_eval_delay_us = 2000, + .ips2_entry_delay_us = 800, .static_screen_wait_frames = 2, }; @@ -2130,6 +2130,7 @@ static bool dcn35_resource_construct( dc->dml2_options.dcn_pipe_count = pool->base.pipe_count; dc->dml2_options.use_native_pstate_optimization = true; dc->dml2_options.use_native_soc_bb_construction = true; + dc->dml2_options.minimize_dispclk_using_odm = false; if (dc->config.EnableMinDispClkODM) dc->dml2_options.minimize_dispclk_using_odm = true; dc->dml2_options.enable_windowed_mpo_odm = dc->config.enable_windowed_mpo_odm; diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index c64b6c848ef7..e699731ee68e 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -2832,6 +2832,7 @@ struct dmub_rb_cmd_psr_set_power_opt { #define REPLAY_RESIDENCY_MODE_MASK (0x1 << REPLAY_RESIDENCY_MODE_SHIFT) # define REPLAY_RESIDENCY_MODE_PHY (0x0 << REPLAY_RESIDENCY_MODE_SHIFT) # define REPLAY_RESIDENCY_MODE_ALPM (0x1 << REPLAY_RESIDENCY_MODE_SHIFT) +# define REPLAY_RESIDENCY_MODE_IPS 0x10 #define REPLAY_RESIDENCY_ENABLE_MASK (0x1 << REPLAY_RESIDENCY_ENABLE_SHIFT) # define REPLAY_RESIDENCY_DISABLE (0x0 << REPLAY_RESIDENCY_ENABLE_SHIFT) @@ -2894,6 +2895,10 @@ enum dmub_cmd_replay_type { * Set Residency Frameupdate Timer. */ DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER = 6, + /** + * Set pseudo vtotal + */ + DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL = 7, }; /** @@ -3077,6 +3082,26 @@ struct dmub_cmd_replay_set_timing_sync_data { }; /** + * Data passed from driver to FW in a DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command. + */ +struct dmub_cmd_replay_set_pseudo_vtotal { + /** + * Panel Instance. + * Panel isntance to identify which replay_state to use + * Currently the support is only for 0 or 1 + */ + uint8_t panel_inst; + /** + * Source Vtotal that Replay + IPS + ABM full screen video src vtotal + */ + uint16_t vtotal; + /** + * Explicit padding to 4 byte boundary. + */ + uint8_t pad; +}; + +/** * Definition of a DMUB_CMD__SET_REPLAY_POWER_OPT command. */ struct dmub_rb_cmd_replay_set_power_opt { @@ -3157,6 +3182,20 @@ struct dmub_rb_cmd_replay_set_timing_sync { }; /** + * Definition of a DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command. + */ +struct dmub_rb_cmd_replay_set_pseudo_vtotal { + /** + * Command header. + */ + struct dmub_cmd_header header; + /** + * Definition of DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command. + */ + struct dmub_cmd_replay_set_pseudo_vtotal data; +}; + +/** * Data passed from driver to FW in DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command. */ struct dmub_cmd_replay_frameupdate_timer_data { @@ -3207,6 +3246,10 @@ union dmub_replay_cmd_set { * Definition of DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command data. */ struct dmub_cmd_replay_frameupdate_timer_data timer_data; + /** + * Definition of DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command data. + */ + struct dmub_cmd_replay_set_pseudo_vtotal pseudo_vtotal_data; }; /** @@ -4358,6 +4401,10 @@ union dmub_rb_cmd { * Definition of a DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command. */ struct dmub_rb_cmd_replay_set_frameupdate_timer replay_set_frameupdate_timer; + /** + * Definition of a DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command. + */ + struct dmub_rb_cmd_replay_set_pseudo_vtotal replay_set_pseudo_vtotal; }; /** diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index ad98e504c00d..e304e8435fb8 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -980,6 +980,11 @@ void set_replay_coasting_vtotal(struct dc_link *link, link->replay_settings.coasting_vtotal_table[type] = vtotal; } +void set_replay_ips_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal) +{ + link->replay_settings.abm_with_ips_on_full_screen_video_pseudo_vtotal = vtotal; +} + void calculate_replay_link_off_frame_count(struct dc_link *link, uint16_t vtotal, uint16_t htotal) { diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h index c17bbc6fb38c..bef4815e1703 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h @@ -57,6 +57,7 @@ void init_replay_config(struct dc_link *link, struct replay_config *pr_config); void set_replay_coasting_vtotal(struct dc_link *link, enum replay_coasting_vtotal_type type, uint16_t vtotal); +void set_replay_ips_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal); void calculate_replay_link_off_frame_count(struct dc_link *link, uint16_t vtotal, uint16_t htotal); diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 1dc5dd9b7bf7..df2c7ffe190f 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -258,6 +258,7 @@ enum DC_DEBUG_MASK { DC_ENABLE_DML2 = 0x100, DC_DISABLE_PSR_SU = 0x200, DC_DISABLE_REPLAY = 0x400, + DC_DISABLE_IPS = 0x800, }; enum amd_dpm_forced_level; diff --git a/drivers/gpu/drm/amd/include/amdgpu_reg_state.h b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h index be519c8edf49..335980e2afbf 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_reg_state.h +++ b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h @@ -138,7 +138,7 @@ static inline size_t amdgpu_reginst_size(uint16_t num_inst, size_t inst_size, } #define amdgpu_asic_get_reg_state_supported(adev) \ - ((adev)->asic_funcs->get_reg_state ? 1 : 0) + (((adev)->asic_funcs && (adev)->asic_funcs->get_reg_state) ? 1 : 0) #define amdgpu_asic_get_reg_state(adev, state, buf, size) \ ((adev)->asic_funcs->get_reg_state ? \ diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 087d57850304..39c5e1dfa275 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2558,6 +2558,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); int err, ret; + u32 pwm_mode; int value; if (amdgpu_in_reset(adev)) @@ -2569,13 +2570,22 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, if (err) return err; + if (value == 0) + pwm_mode = AMD_FAN_CTRL_NONE; + else if (value == 1) + pwm_mode = AMD_FAN_CTRL_MANUAL; + else if (value == 2) + pwm_mode = AMD_FAN_CTRL_AUTO; + else + return -EINVAL; + ret = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (ret < 0) { pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); return ret; } - ret = amdgpu_dpm_set_fan_control_mode(adev, value); + ret = amdgpu_dpm_set_fan_control_mode(adev, pwm_mode); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index df4f20293c16..eb4da3666e05 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -6925,6 +6925,23 @@ static int si_dpm_enable(struct amdgpu_device *adev) return 0; } +static int si_set_temperature_range(struct amdgpu_device *adev) +{ + int ret; + + ret = si_thermal_enable_alert(adev, false); + if (ret) + return ret; + ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX); + if (ret) + return ret; + ret = si_thermal_enable_alert(adev, true); + if (ret) + return ret; + + return ret; +} + static void si_dpm_disable(struct amdgpu_device *adev) { struct rv7xx_power_info *pi = rv770_get_pi(adev); @@ -7608,6 +7625,18 @@ static int si_dpm_process_interrupt(struct amdgpu_device *adev, static int si_dpm_late_init(void *handle) { + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->pm.dpm_enabled) + return 0; + + ret = si_set_temperature_range(adev); + if (ret) + return ret; +#if 0 //TODO ? + si_dpm_powergate_uvd(adev, true); +#endif return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index c16703868e5c..0ad947df777a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -24,6 +24,7 @@ #include <linux/firmware.h> #include <linux/pci.h> +#include <linux/power_supply.h> #include <linux/reboot.h> #include "amdgpu.h" @@ -733,7 +734,7 @@ static int smu_early_init(void *handle) smu->adev = adev; smu->pm_enabled = !!amdgpu_dpm; smu->is_apu = false; - smu->smu_baco.state = SMU_BACO_STATE_NONE; + smu->smu_baco.state = SMU_BACO_STATE_EXIT; smu->smu_baco.platform_support = false; smu->user_dpm_profile.fan_mode = -1; @@ -817,16 +818,8 @@ static int smu_late_init(void *handle) * handle the switch automatically. Driver involvement * is unnecessary. */ - if (!smu->dc_controlled_by_gpio) { - ret = smu_set_power_source(smu, - adev->pm.ac_power ? SMU_POWER_SOURCE_AC : - SMU_POWER_SOURCE_DC); - if (ret) { - dev_err(adev->dev, "Failed to switch to %s mode!\n", - adev->pm.ac_power ? "AC" : "DC"); - return ret; - } - } + adev->pm.ac_power = power_supply_is_system_supplied() > 0; + smu_set_ac_dc(smu); if ((amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 1)) || (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 3))) @@ -1961,31 +1954,10 @@ static int smu_smc_hw_cleanup(struct smu_context *smu) return 0; } -static int smu_reset_mp1_state(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - int ret = 0; - - if ((!adev->in_runpm) && (!adev->in_suspend) && - (!amdgpu_in_reset(adev))) - switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { - case IP_VERSION(13, 0, 0): - case IP_VERSION(13, 0, 7): - case IP_VERSION(13, 0, 10): - ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD); - break; - default: - break; - } - - return ret; -} - static int smu_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = adev->powerplay.pp_handle; - int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; @@ -2003,15 +1975,7 @@ static int smu_hw_fini(void *handle) adev->pm.dpm_enabled = false; - ret = smu_smc_hw_cleanup(smu); - if (ret) - return ret; - - ret = smu_reset_mp1_state(smu); - if (ret) - return ret; - - return 0; + return smu_smc_hw_cleanup(smu); } static void smu_late_fini(void *handle) @@ -2710,6 +2674,7 @@ int smu_get_power_limit(void *handle, case SMU_PPT_LIMIT_CURRENT: switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(13, 0, 2): + case IP_VERSION(13, 0, 6): case IP_VERSION(11, 0, 7): case IP_VERSION(11, 0, 11): case IP_VERSION(11, 0, 12): diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 2aa4fea87314..66e84defd0b6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -424,7 +424,6 @@ enum smu_reset_mode { enum smu_baco_state { SMU_BACO_STATE_ENTER = 0, SMU_BACO_STATE_EXIT, - SMU_BACO_STATE_NONE, }; struct smu_baco_context { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 4cd43bbec910..bcad42534da4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1303,13 +1303,12 @@ static int arcturus_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 8d1d29ffb0f1..ed189a3878eb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2357,13 +2357,12 @@ static int navi10_get_power_limit(struct smu_context *smu, *default_power_limit = power_limit; if (smu->od_enabled && - navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_POWER_LIMIT)) { + navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_POWER_LIMIT)) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 21fc033528fa..e2ad2b972ab0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -640,13 +640,12 @@ static int sienna_cichlid_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 5a314d0316c1..c7bfa68bf00f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1442,10 +1442,12 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev, case 0x3: dev_dbg(adev->dev, "Switched to AC mode!\n"); schedule_work(&smu->interrupt_work); + adev->pm.ac_power = true; break; case 0x4: dev_dbg(adev->dev, "Switched to DC mode!\n"); schedule_work(&smu->interrupt_work); + adev->pm.ac_power = false; break; case 0x7: /* diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 771a3d457c33..c486182ff275 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1379,10 +1379,12 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev, case 0x3: dev_dbg(adev->dev, "Switched to AC mode!\n"); smu_v13_0_ack_ac_dc_interrupt(smu); + adev->pm.ac_power = true; break; case 0x4: dev_dbg(adev->dev, "Switched to DC mode!\n"); smu_v13_0_ack_ac_dc_interrupt(smu); + adev->pm.ac_power = false; break; case 0x7: /* diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index a9b25faa63e4..9b80f18ea6c3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2357,6 +2357,7 @@ static int smu_v13_0_0_get_power_limit(struct smu_context *smu, PPTable_t *pptable = table_context->driver_pptable; SkuTable_t *skutable = &pptable->SkuTable; uint32_t power_limit, od_percent_upper, od_percent_lower; + uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; if (smu_v13_0_get_current_power_limit(smu, &power_limit)) power_limit = smu->adev->pm.ac_power ? @@ -2368,19 +2369,18 @@ static int smu_v13_0_0_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); if (max_power_limit) { - *max_power_limit = power_limit * (100 + od_percent_upper); + *max_power_limit = msg_limit * (100 + od_percent_upper); *max_power_limit /= 100; } @@ -2747,13 +2747,7 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu, switch (mp1_state) { case PP_MP1_STATE_UNLOAD: - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_PrepareMp1ForUnload, - 0x55, NULL); - - if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT) - ret = smu_v13_0_disable_pmfw_state(smu); - + ret = smu_cmn_set_mp1_state(smu, mp1_state); break; default: /* Ignore others */ @@ -2949,7 +2943,7 @@ static bool smu_v13_0_0_wbrf_support_check(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - switch (adev->ip_versions[MP1_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(13, 0, 0): return smu->smc_fw_version >= 0x004e6300; case IP_VERSION(13, 0, 10): @@ -2959,6 +2953,55 @@ static bool smu_v13_0_0_wbrf_support_check(struct smu_context *smu) } } +static int smu_v13_0_0_set_power_limit(struct smu_context *smu, + enum smu_ppt_limit_type limit_type, + uint32_t limit) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + SkuTable_t *skutable = &pptable->SkuTable; + uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)table_context->overdrive_table; + int ret = 0; + + if (limit_type != SMU_DEFAULT_PPT_LIMIT) + return -EINVAL; + + if (limit <= msg_limit) { + if (smu->current_power_limit > msg_limit) { + od_table->OverDriveTable.Ppt = 0; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v13_0_0_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + } + return smu_v13_0_set_power_limit(smu, limit_type, limit); + } else if (smu->od_enabled) { + ret = smu_v13_0_set_power_limit(smu, limit_type, msg_limit); + if (ret) + return ret; + + od_table->OverDriveTable.Ppt = (limit * 100) / msg_limit - 100; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v13_0_0_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + + smu->current_power_limit = limit; + } else { + return -EINVAL; + } + + return 0; +} + static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, @@ -3013,7 +3056,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .set_fan_control_mode = smu_v13_0_set_fan_control_mode, .enable_mgpu_fan_boost = smu_v13_0_0_enable_mgpu_fan_boost, .get_power_limit = smu_v13_0_0_get_power_limit, - .set_power_limit = smu_v13_0_set_power_limit, + .set_power_limit = smu_v13_0_0_set_power_limit, .set_power_source = smu_v13_0_set_power_source, .get_power_profile_mode = smu_v13_0_0_get_power_profile_mode, .set_power_profile_mode = smu_v13_0_0_set_power_profile_mode, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 3c98a8a0386a..7e1941cf1796 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -160,8 +160,8 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(GfxDriverResetRecovery, PPSMC_MSG_GfxDriverResetRecovery, 0), MSG_MAP(GetMinGfxclkFrequency, PPSMC_MSG_GetMinGfxDpmFreq, 1), MSG_MAP(GetMaxGfxclkFrequency, PPSMC_MSG_GetMaxGfxDpmFreq, 1), - MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxClk, 0), - MSG_MAP(SetSoftMaxGfxClk, PPSMC_MSG_SetSoftMaxGfxClk, 0), + MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxClk, 1), + MSG_MAP(SetSoftMaxGfxClk, PPSMC_MSG_SetSoftMaxGfxClk, 1), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareForDriverUnload, 0), MSG_MAP(GetCTFLimit, PPSMC_MSG_GetCTFLimit, 0), MSG_MAP(GetThermalLimit, PPSMC_MSG_ReadThrottlerLimit, 0), diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 59606a19e3d2..3dc7b60cb075 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2321,6 +2321,7 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu, PPTable_t *pptable = table_context->driver_pptable; SkuTable_t *skutable = &pptable->SkuTable; uint32_t power_limit, od_percent_upper, od_percent_lower; + uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; if (smu_v13_0_get_current_power_limit(smu, &power_limit)) power_limit = smu->adev->pm.ac_power ? @@ -2332,19 +2333,18 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); if (max_power_limit) { - *max_power_limit = power_limit * (100 + od_percent_upper); + *max_power_limit = msg_limit * (100 + od_percent_upper); *max_power_limit /= 100; } @@ -2504,13 +2504,7 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu, switch (mp1_state) { case PP_MP1_STATE_UNLOAD: - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_PrepareMp1ForUnload, - 0x55, NULL); - - if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT) - ret = smu_v13_0_disable_pmfw_state(smu); - + ret = smu_cmn_set_mp1_state(smu, mp1_state); break; default: /* Ignore others */ @@ -2545,6 +2539,55 @@ static bool smu_v13_0_7_wbrf_support_check(struct smu_context *smu) return smu->smc_fw_version > 0x00524600; } +static int smu_v13_0_7_set_power_limit(struct smu_context *smu, + enum smu_ppt_limit_type limit_type, + uint32_t limit) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + SkuTable_t *skutable = &pptable->SkuTable; + uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)table_context->overdrive_table; + int ret = 0; + + if (limit_type != SMU_DEFAULT_PPT_LIMIT) + return -EINVAL; + + if (limit <= msg_limit) { + if (smu->current_power_limit > msg_limit) { + od_table->OverDriveTable.Ppt = 0; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v13_0_7_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + } + return smu_v13_0_set_power_limit(smu, limit_type, limit); + } else if (smu->od_enabled) { + ret = smu_v13_0_set_power_limit(smu, limit_type, msg_limit); + if (ret) + return ret; + + od_table->OverDriveTable.Ppt = (limit * 100) / msg_limit - 100; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v13_0_7_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + + smu->current_power_limit = limit; + } else { + return -EINVAL; + } + + return 0; +} + static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_7_set_default_dpm_table, @@ -2596,7 +2639,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .set_fan_control_mode = smu_v13_0_set_fan_control_mode, .enable_mgpu_fan_boost = smu_v13_0_7_enable_mgpu_fan_boost, .get_power_limit = smu_v13_0_7_get_power_limit, - .set_power_limit = smu_v13_0_set_power_limit, + .set_power_limit = smu_v13_0_7_set_power_limit, .set_power_source = smu_v13_0_set_power_source, .get_power_profile_mode = smu_v13_0_7_get_power_profile_mode, .set_power_profile_mode = smu_v13_0_7_set_power_profile_mode, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index 4894f7ee737b..6dae5ad74ff0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -229,8 +229,6 @@ int smu_v14_0_check_fw_version(struct smu_context *smu) smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_2; break; case IP_VERSION(14, 0, 0): - if ((smu->smc_fw_version < 0x5d3a00)) - dev_warn(smu->adev->dev, "The PMFW version(%x) is behind in this BIOS!\n", smu->smc_fw_version); smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0; break; default: diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c index 47fdbae4adfc..9310c4758e38 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c @@ -261,7 +261,10 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu, *value = metrics->MpipuclkFrequency; break; case METRICS_AVERAGE_GFXACTIVITY: - *value = metrics->GfxActivity / 100; + if ((smu->smc_fw_version > 0x5d4600)) + *value = metrics->GfxActivity; + else + *value = metrics->GfxActivity / 100; break; case METRICS_AVERAGE_VCNACTIVITY: *value = metrics->VcnActivity / 100; diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index ef31033439bc..29d91493b101 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -1762,6 +1762,7 @@ static ssize_t anx7625_aux_transfer(struct drm_dp_aux *aux, u8 request = msg->request & ~DP_AUX_I2C_MOT; int ret = 0; + mutex_lock(&ctx->aux_lock); pm_runtime_get_sync(dev); msg->reply = 0; switch (request) { @@ -1778,6 +1779,7 @@ static ssize_t anx7625_aux_transfer(struct drm_dp_aux *aux, msg->size, msg->buffer); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); + mutex_unlock(&ctx->aux_lock); return ret; } @@ -2474,7 +2476,9 @@ static void anx7625_bridge_atomic_disable(struct drm_bridge *bridge, ctx->connector = NULL; anx7625_dp_stop(ctx); - pm_runtime_put_sync(dev); + mutex_lock(&ctx->aux_lock); + pm_runtime_put_sync_suspend(dev); + mutex_unlock(&ctx->aux_lock); } static enum drm_connector_status @@ -2668,6 +2672,7 @@ static int anx7625_i2c_probe(struct i2c_client *client) mutex_init(&platform->lock); mutex_init(&platform->hdcp_wq_lock); + mutex_init(&platform->aux_lock); INIT_DELAYED_WORK(&platform->hdcp_work, hdcp_check_work_func); platform->hdcp_workqueue = create_workqueue("hdcp workqueue"); diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.h b/drivers/gpu/drm/bridge/analogix/anx7625.h index 66ebee7f3d83..39ed35d33836 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.h +++ b/drivers/gpu/drm/bridge/analogix/anx7625.h @@ -475,6 +475,8 @@ struct anx7625_data { struct workqueue_struct *hdcp_workqueue; /* Lock for hdcp work queue */ struct mutex hdcp_wq_lock; + /* Lock for aux transfer and disable */ + struct mutex aux_lock; char edid_block; struct display_timing dt; u8 display_timing_valid; diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c index bb55f697a181..6886db2d9e00 100644 --- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c +++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c @@ -25,20 +25,18 @@ static void drm_aux_hpd_bridge_release(struct device *dev) ida_free(&drm_aux_hpd_bridge_ida, adev->id); of_node_put(adev->dev.platform_data); + of_node_put(adev->dev.of_node); kfree(adev); } -static void drm_aux_hpd_bridge_unregister_adev(void *_adev) +static void drm_aux_hpd_bridge_free_adev(void *_adev) { - struct auxiliary_device *adev = _adev; - - auxiliary_device_delete(adev); - auxiliary_device_uninit(adev); + auxiliary_device_uninit(_adev); } /** - * drm_dp_hpd_bridge_register - Create a simple HPD DisplayPort bridge + * devm_drm_dp_hpd_bridge_alloc - allocate a HPD DisplayPort bridge * @parent: device instance providing this bridge * @np: device node pointer corresponding to this bridge instance * @@ -46,11 +44,9 @@ static void drm_aux_hpd_bridge_unregister_adev(void *_adev) * DRM_MODE_CONNECTOR_DisplayPort, which terminates the bridge chain and is * able to send the HPD events. * - * Return: device instance that will handle created bridge or an error code - * encoded into the pointer. + * Return: bridge auxiliary device pointer or an error pointer */ -struct device *drm_dp_hpd_bridge_register(struct device *parent, - struct device_node *np) +struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, struct device_node *np) { struct auxiliary_device *adev; int ret; @@ -74,18 +70,62 @@ struct device *drm_dp_hpd_bridge_register(struct device *parent, ret = auxiliary_device_init(adev); if (ret) { + of_node_put(adev->dev.platform_data); + of_node_put(adev->dev.of_node); ida_free(&drm_aux_hpd_bridge_ida, adev->id); kfree(adev); return ERR_PTR(ret); } - ret = auxiliary_device_add(adev); - if (ret) { - auxiliary_device_uninit(adev); + ret = devm_add_action_or_reset(parent, drm_aux_hpd_bridge_free_adev, adev); + if (ret) return ERR_PTR(ret); - } - ret = devm_add_action_or_reset(parent, drm_aux_hpd_bridge_unregister_adev, adev); + return adev; +} +EXPORT_SYMBOL_GPL(devm_drm_dp_hpd_bridge_alloc); + +static void drm_aux_hpd_bridge_del_adev(void *_adev) +{ + auxiliary_device_delete(_adev); +} + +/** + * devm_drm_dp_hpd_bridge_add - register a HDP DisplayPort bridge + * @dev: struct device to tie registration lifetime to + * @adev: bridge auxiliary device to be registered + * + * Returns: zero on success or a negative errno + */ +int devm_drm_dp_hpd_bridge_add(struct device *dev, struct auxiliary_device *adev) +{ + int ret; + + ret = auxiliary_device_add(adev); + if (ret) + return ret; + + return devm_add_action_or_reset(dev, drm_aux_hpd_bridge_del_adev, adev); +} +EXPORT_SYMBOL_GPL(devm_drm_dp_hpd_bridge_add); + +/** + * drm_dp_hpd_bridge_register - allocate and register a HDP DisplayPort bridge + * @parent: device instance providing this bridge + * @np: device node pointer corresponding to this bridge instance + * + * Return: device instance that will handle created bridge or an error pointer + */ +struct device *drm_dp_hpd_bridge_register(struct device *parent, struct device_node *np) +{ + struct auxiliary_device *adev; + int ret; + + adev = devm_drm_dp_hpd_bridge_alloc(parent, np); + if (IS_ERR(adev)) + return ERR_CAST(adev); + + ret = devm_drm_dp_hpd_bridge_add(parent, adev); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c index 541e4f5afc4c..14d4dcf239da 100644 --- a/drivers/gpu/drm/bridge/parade-ps8640.c +++ b/drivers/gpu/drm/bridge/parade-ps8640.c @@ -107,6 +107,7 @@ struct ps8640 { struct device_link *link; bool pre_enabled; bool need_post_hpd_delay; + struct mutex aux_lock; }; static const struct regmap_config ps8640_regmap_config[] = { @@ -345,11 +346,20 @@ static ssize_t ps8640_aux_transfer(struct drm_dp_aux *aux, struct device *dev = &ps_bridge->page[PAGE0_DP_CNTL]->dev; int ret; + mutex_lock(&ps_bridge->aux_lock); pm_runtime_get_sync(dev); + ret = _ps8640_wait_hpd_asserted(ps_bridge, 200 * 1000); + if (ret) { + pm_runtime_put_sync_suspend(dev); + goto exit; + } ret = ps8640_aux_transfer_msg(aux, msg); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); +exit: + mutex_unlock(&ps_bridge->aux_lock); + return ret; } @@ -470,7 +480,18 @@ static void ps8640_atomic_post_disable(struct drm_bridge *bridge, ps_bridge->pre_enabled = false; ps8640_bridge_vdo_control(ps_bridge, DISABLE); + + /* + * The bridge seems to expect everything to be power cycled at the + * disable process, so grab a lock here to make sure + * ps8640_aux_transfer() is not holding a runtime PM reference and + * preventing the bridge from suspend. + */ + mutex_lock(&ps_bridge->aux_lock); + pm_runtime_put_sync_suspend(&ps_bridge->page[PAGE0_DP_CNTL]->dev); + + mutex_unlock(&ps_bridge->aux_lock); } static int ps8640_bridge_attach(struct drm_bridge *bridge, @@ -619,6 +640,8 @@ static int ps8640_probe(struct i2c_client *client) if (!ps_bridge) return -ENOMEM; + mutex_init(&ps_bridge->aux_lock); + ps_bridge->supplies[0].supply = "vdd12"; ps_bridge->supplies[1].supply = "vdd33"; ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(ps_bridge->supplies), diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c index be5914caa17d..63a1a0c88be4 100644 --- a/drivers/gpu/drm/bridge/samsung-dsim.c +++ b/drivers/gpu/drm/bridge/samsung-dsim.c @@ -969,10 +969,6 @@ static int samsung_dsim_init_link(struct samsung_dsim *dsi) reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG); reg &= ~DSIM_STOP_STATE_CNT_MASK; reg |= DSIM_STOP_STATE_CNT(driver_data->reg_values[STOP_STATE_CNT]); - - if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) - reg |= DSIM_FORCE_STOP_STATE; - samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg); reg = DSIM_BTA_TIMEOUT(0xff) | DSIM_LPDR_TIMEOUT(0xffff); @@ -1431,18 +1427,6 @@ static void samsung_dsim_disable_irq(struct samsung_dsim *dsi) disable_irq(dsi->irq); } -static void samsung_dsim_set_stop_state(struct samsung_dsim *dsi, bool enable) -{ - u32 reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG); - - if (enable) - reg |= DSIM_FORCE_STOP_STATE; - else - reg &= ~DSIM_FORCE_STOP_STATE; - - samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg); -} - static int samsung_dsim_init(struct samsung_dsim *dsi) { const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; @@ -1492,9 +1476,6 @@ static void samsung_dsim_atomic_pre_enable(struct drm_bridge *bridge, ret = samsung_dsim_init(dsi); if (ret) return; - - samsung_dsim_set_display_mode(dsi); - samsung_dsim_set_display_enable(dsi, true); } } @@ -1503,12 +1484,8 @@ static void samsung_dsim_atomic_enable(struct drm_bridge *bridge, { struct samsung_dsim *dsi = bridge_to_dsi(bridge); - if (samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) { - samsung_dsim_set_display_mode(dsi); - samsung_dsim_set_display_enable(dsi, true); - } else { - samsung_dsim_set_stop_state(dsi, false); - } + samsung_dsim_set_display_mode(dsi); + samsung_dsim_set_display_enable(dsi, true); dsi->state |= DSIM_STATE_VIDOUT_AVAILABLE; } @@ -1521,9 +1498,6 @@ static void samsung_dsim_atomic_disable(struct drm_bridge *bridge, if (!(dsi->state & DSIM_STATE_ENABLED)) return; - if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) - samsung_dsim_set_stop_state(dsi, true); - dsi->state &= ~DSIM_STATE_VIDOUT_AVAILABLE; } @@ -1828,8 +1802,6 @@ static ssize_t samsung_dsim_host_transfer(struct mipi_dsi_host *host, if (ret) return ret; - samsung_dsim_set_stop_state(dsi, false); - ret = mipi_dsi_create_packet(&xfer.packet, msg); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c index 2bdc5b439beb..4560ae9cbce1 100644 --- a/drivers/gpu/drm/bridge/sii902x.c +++ b/drivers/gpu/drm/bridge/sii902x.c @@ -1080,6 +1080,26 @@ static int sii902x_init(struct sii902x *sii902x) return ret; } + ret = sii902x_audio_codec_init(sii902x, dev); + if (ret) + return ret; + + i2c_set_clientdata(sii902x->i2c, sii902x); + + sii902x->i2cmux = i2c_mux_alloc(sii902x->i2c->adapter, dev, + 1, 0, I2C_MUX_GATE, + sii902x_i2c_bypass_select, + sii902x_i2c_bypass_deselect); + if (!sii902x->i2cmux) { + ret = -ENOMEM; + goto err_unreg_audio; + } + + sii902x->i2cmux->priv = sii902x; + ret = i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0); + if (ret) + goto err_unreg_audio; + sii902x->bridge.funcs = &sii902x_bridge_funcs; sii902x->bridge.of_node = dev->of_node; sii902x->bridge.timings = &default_sii902x_timings; @@ -1090,19 +1110,13 @@ static int sii902x_init(struct sii902x *sii902x) drm_bridge_add(&sii902x->bridge); - sii902x_audio_codec_init(sii902x, dev); - - i2c_set_clientdata(sii902x->i2c, sii902x); + return 0; - sii902x->i2cmux = i2c_mux_alloc(sii902x->i2c->adapter, dev, - 1, 0, I2C_MUX_GATE, - sii902x_i2c_bypass_select, - sii902x_i2c_bypass_deselect); - if (!sii902x->i2cmux) - return -ENOMEM; +err_unreg_audio: + if (!PTR_ERR_OR_ZERO(sii902x->audio.pdev)) + platform_device_unregister(sii902x->audio.pdev); - sii902x->i2cmux->priv = sii902x; - return i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0); + return ret; } static int sii902x_probe(struct i2c_client *client) @@ -1170,12 +1184,14 @@ static int sii902x_probe(struct i2c_client *client) } static void sii902x_remove(struct i2c_client *client) - { struct sii902x *sii902x = i2c_get_clientdata(client); - i2c_mux_del_adapters(sii902x->i2cmux); drm_bridge_remove(&sii902x->bridge); + i2c_mux_del_adapters(sii902x->i2cmux); + + if (!PTR_ERR_OR_ZERO(sii902x->audio.pdev)) + platform_device_unregister(sii902x->audio.pdev); } static const struct of_device_id sii902x_dt_ids[] = { diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index bd6c24d4213c..f7c6b60629c2 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -5491,6 +5491,7 @@ EXPORT_SYMBOL(drm_dp_mst_atomic_enable_dsc); * - 0 if the new state is valid * - %-ENOSPC, if the new state is invalid, because of BW limitation * @failing_port is set to: + * * - The non-root port where a BW limit check failed * with all the ports downstream of @failing_port passing * the BW limit check. @@ -5499,6 +5500,7 @@ EXPORT_SYMBOL(drm_dp_mst_atomic_enable_dsc); * - %NULL if the BW limit check failed at the root port * with all the ports downstream of the root port passing * the BW limit check. + * * - %-EINVAL, if the new state is invalid, because the root port has * too many payloads. */ diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index f57e6d74fb0e..5ebdd6f8f36e 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -332,6 +332,7 @@ alloc_range_bias(struct drm_buddy *mm, u64 start, u64 end, unsigned int order) { + u64 req_size = mm->chunk_size << order; struct drm_buddy_block *block; struct drm_buddy_block *buddy; LIST_HEAD(dfs); @@ -367,6 +368,15 @@ alloc_range_bias(struct drm_buddy *mm, if (drm_buddy_block_is_allocated(block)) continue; + if (block_start < start || block_end > end) { + u64 adjusted_start = max(block_start, start); + u64 adjusted_end = min(block_end, end); + + if (round_down(adjusted_end + 1, req_size) <= + round_up(adjusted_start, req_size)) + continue; + } + if (contains(start, end, block_start, block_end) && order == drm_buddy_block_order(block)) { /* @@ -538,7 +548,13 @@ static int __alloc_range(struct drm_buddy *mm, list_add(&block->left->tmp_link, dfs); } while (1); + if (total_allocated < size) { + err = -ENOSPC; + goto err_free; + } + list_splice_tail(&allocated, blocks); + return 0; err_undo: @@ -755,8 +771,12 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, return -EINVAL; /* Actual range allocation */ - if (start + size == end) + if (start + size == end) { + if (!IS_ALIGNED(start | end, min_block_size)) + return -EINVAL; + return __drm_buddy_alloc_range(mm, start, size, NULL, blocks); + } original_size = size; original_min_size = min_block_size; diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index cb90e70d85e8..65f9f66933bb 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -904,6 +904,7 @@ out: connector_set = NULL; fb = NULL; mode = NULL; + num_connectors = 0; DRM_MODESET_LOCK_ALL_END(dev, ctx, ret); diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 834a5e28abbe..7352bde299d5 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -820,7 +820,7 @@ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev, if (max_segment == 0) max_segment = UINT_MAX; err = sg_alloc_table_from_pages_segment(sg, pages, nr_pages, 0, - nr_pages << PAGE_SHIFT, + (unsigned long)nr_pages << PAGE_SHIFT, max_segment, GFP_KERNEL); if (err) { kfree(sg); diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index 3f479483d7d8..23b4e9a3361d 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -760,9 +760,11 @@ static void output_poll_execute(struct work_struct *work) changed = dev->mode_config.delayed_event; dev->mode_config.delayed_event = false; - if (!drm_kms_helper_poll && dev->mode_config.poll_running) { - drm_kms_helper_disable_hpd(dev); - dev->mode_config.poll_running = false; + if (!drm_kms_helper_poll) { + if (dev->mode_config.poll_running) { + drm_kms_helper_disable_hpd(dev); + dev->mode_config.poll_running = false; + } goto out; } diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 84101baeecc6..a6c19de46292 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1040,7 +1040,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, uint64_t *points; uint32_t signaled_count, i; - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) + if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) lockdep_assert_none_held_once(); points = kmalloc_array(count, sizeof(*points), GFP_KERNEL); @@ -1109,7 +1110,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, * fallthough and try a 0 timeout wait! */ - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { + if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) { for (i = 0; i < count; ++i) drm_syncobj_fence_add_wait(syncobjs[i], &entries[i]); } @@ -1416,10 +1418,21 @@ syncobj_eventfd_entry_func(struct drm_syncobj *syncobj, /* This happens inside the syncobj lock */ fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, 1)); + if (!fence) + return; + ret = dma_fence_chain_find_seqno(&fence, entry->point); - if (ret != 0 || !fence) { + if (ret != 0) { + /* The given seqno has not been submitted yet. */ dma_fence_put(fence); return; + } else if (!fence) { + /* If dma_fence_chain_find_seqno returns 0 but sets the fence + * to NULL, it implies that the given seqno is signaled and a + * later seqno has already been submitted. Assign a stub fence + * so that the eventfd still gets signaled below. + */ + fence = dma_fence_get_stub(); } list_del_init(&entry->node); diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 776f2f0b602d..0ef7bc8848b0 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -319,9 +319,9 @@ static void decon_win_set_bldmod(struct decon_context *ctx, unsigned int win, static void decon_win_set_pixfmt(struct decon_context *ctx, unsigned int win, struct drm_framebuffer *fb) { - struct exynos_drm_plane plane = ctx->planes[win]; + struct exynos_drm_plane *plane = &ctx->planes[win]; struct exynos_drm_plane_state *state = - to_exynos_plane_state(plane.base.state); + to_exynos_plane_state(plane->base.state); unsigned int alpha = state->base.alpha; unsigned int pixel_alpha; unsigned long val; diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index a9f1c5c05894..f2145227a1e0 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -480,7 +480,7 @@ static void fimd_commit(struct exynos_drm_crtc *crtc) struct fimd_context *ctx = crtc->ctx; struct drm_display_mode *mode = &crtc->base.state->adjusted_mode; const struct fimd_driver_data *driver_data = ctx->driver_data; - void *timing_base = ctx->regs + driver_data->timing_base; + void __iomem *timing_base = ctx->regs + driver_data->timing_base; u32 val; if (ctx->suspended) @@ -661,9 +661,9 @@ static void fimd_win_set_bldmod(struct fimd_context *ctx, unsigned int win, static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win, struct drm_framebuffer *fb, int width) { - struct exynos_drm_plane plane = ctx->planes[win]; + struct exynos_drm_plane *plane = &ctx->planes[win]; struct exynos_drm_plane_state *state = - to_exynos_plane_state(plane.base.state); + to_exynos_plane_state(plane->base.state); uint32_t pixel_format = fb->format->format; unsigned int alpha = state->base.alpha; u32 val = WINCONx_ENWIN; diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index e9a769590415..180507a47700 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -1341,7 +1341,7 @@ static int __maybe_unused gsc_runtime_resume(struct device *dev) for (i = 0; i < ctx->num_clocks; i++) { ret = clk_prepare_enable(ctx->clocks[i]); if (ret) { - while (--i > 0) + while (--i >= 0) clk_disable_unprepare(ctx->clocks[i]); return ret; } diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index b5d6e3352071..3089029abba4 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -140,7 +140,7 @@ config DRM_I915_GVT_KVMGT Note that this driver only supports newer device from Broadwell on. For further information and setup guide, you can visit: - http://01.org/igvt-g. + https://github.com/intel/gvt-linux/wiki. If in doubt, say "N". diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e777686190ca..c13f14edb508 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -17,7 +17,6 @@ subdir-ccflags-y += $(call cc-option, -Wunused-const-variable) subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned) subdir-ccflags-y += $(call cc-option, -Wformat-overflow) subdir-ccflags-y += $(call cc-option, -Wformat-truncation) -subdir-ccflags-y += $(call cc-option, -Wstringop-overflow) subdir-ccflags-y += $(call cc-option, -Wstringop-truncation) # The following turn off the warnings enabled by -Wextra ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index ac456a2275db..eda4a8b88590 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1155,6 +1155,7 @@ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder) } intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); /* ensure all panel commands dispatched before enabling transcoder */ wait_for_cmds_dispatched_to_panel(encoder); @@ -1255,8 +1256,6 @@ static void gen11_dsi_enable(struct intel_atomic_state *state, /* step6d: enable dsi transcoder */ gen11_dsi_enable_transcoder(encoder); - intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); - /* step7: enable backlight */ intel_backlight_enable(crtc_state, conn_state); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON); diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.c b/drivers/gpu/drm/i915/display/intel_display_power_well.c index 47cd6bb04366..06900ff307b2 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_well.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_well.c @@ -246,7 +246,14 @@ static enum phy icl_aux_pw_to_phy(struct drm_i915_private *i915, enum aux_ch aux_ch = icl_aux_pw_to_ch(power_well); struct intel_digital_port *dig_port = aux_ch_to_digital_port(i915, aux_ch); - return intel_port_to_phy(i915, dig_port->base.port); + /* + * FIXME should we care about the (VBT defined) dig_port->aux_ch + * relationship or should this be purely defined by the hardware layout? + * Currently if the port doesn't appear in the VBT, or if it's declared + * as HDMI-only and routed to a combo PHY, the encoder either won't be + * present at all or it will not have an aux_ch assigned. + */ + return dig_port ? intel_port_to_phy(i915, dig_port->base.port) : PHY_NONE; } static void hsw_wait_for_power_well_enable(struct drm_i915_private *dev_priv, @@ -414,7 +421,8 @@ icl_combo_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, intel_de_rmw(dev_priv, regs->driver, 0, HSW_PWR_WELL_CTL_REQ(pw_idx)); - if (DISPLAY_VER(dev_priv) < 12) + /* FIXME this is a mess */ + if (phy != PHY_NONE) intel_de_rmw(dev_priv, ICL_PORT_CL_DW12(phy), 0, ICL_LANE_ENABLE_AUX); @@ -437,7 +445,10 @@ icl_combo_phy_aux_power_well_disable(struct drm_i915_private *dev_priv, drm_WARN_ON(&dev_priv->drm, !IS_ICELAKE(dev_priv)); - intel_de_rmw(dev_priv, ICL_PORT_CL_DW12(phy), ICL_LANE_ENABLE_AUX, 0); + /* FIXME this is a mess */ + if (phy != PHY_NONE) + intel_de_rmw(dev_priv, ICL_PORT_CL_DW12(phy), + ICL_LANE_ENABLE_AUX, 0); intel_de_rmw(dev_priv, regs->driver, HSW_PWR_WELL_CTL_REQ(pw_idx), 0); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 3fdd8a517983..ac7fe6281afe 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -609,6 +609,13 @@ struct intel_connector { * and active (i.e. dpms ON state). */ bool (*get_hw_state)(struct intel_connector *); + /* + * Optional hook called during init/resume to sync any state + * stored in the connector (eg. DSC state) wrt. the HW state. + */ + void (*sync_state)(struct intel_connector *connector, + const struct intel_crtc_state *crtc_state); + /* Panel info for eDP and LVDS */ struct intel_panel panel; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index f5ef95da5534..94d2a15d8444 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2355,6 +2355,9 @@ intel_dp_compute_config_limits(struct intel_dp *intel_dp, limits->min_rate = intel_dp_common_rate(intel_dp, 0); limits->max_rate = intel_dp_max_link_rate(intel_dp); + /* FIXME 128b/132b SST support missing */ + limits->max_rate = min(limits->max_rate, 810000); + limits->min_lane_count = 1; limits->max_lane_count = intel_dp_max_lane_count(intel_dp); @@ -5696,6 +5699,9 @@ intel_dp_detect(struct drm_connector *connector, goto out; } + if (!intel_dp_is_edp(intel_dp)) + intel_psr_init_dpcd(intel_dp); + intel_dp_detect_dsc_caps(intel_dp, intel_connector); intel_dp_configure_mst(intel_dp); @@ -5856,6 +5862,19 @@ intel_dp_connector_unregister(struct drm_connector *connector) intel_connector_unregister(connector); } +void intel_dp_connector_sync_state(struct intel_connector *connector, + const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *i915 = to_i915(connector->base.dev); + + if (crtc_state && crtc_state->dsc.compression_enable) { + drm_WARN_ON(&i915->drm, !connector->dp.dsc_decompression_aux); + connector->dp.dsc_decompression_enabled = true; + } else { + connector->dp.dsc_decompression_enabled = false; + } +} + void intel_dp_encoder_flush_work(struct drm_encoder *encoder) { struct intel_digital_port *dig_port = enc_to_dig_port(to_intel_encoder(encoder)); diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 05db46b111f2..375d0677cd8c 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -45,6 +45,8 @@ bool intel_dp_limited_color_range(const struct intel_crtc_state *crtc_state, int intel_dp_min_bpp(enum intel_output_format output_format); bool intel_dp_init_connector(struct intel_digital_port *dig_port, struct intel_connector *intel_connector); +void intel_dp_connector_sync_state(struct intel_connector *connector, + const struct intel_crtc_state *crtc_state); void intel_dp_set_link_params(struct intel_dp *intel_dp, int link_rate, int lane_count); int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c index 3a595cd433d4..8538d1ce2fcb 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -330,23 +330,13 @@ static const struct hdcp2_dp_msg_data hdcp2_dp_msg_data[] = { 0, 0 }, }; -static struct drm_dp_aux * -intel_dp_hdcp_get_aux(struct intel_connector *connector) -{ - struct intel_digital_port *dig_port = intel_attached_dig_port(connector); - - if (intel_encoder_is_mst(connector->encoder)) - return &connector->port->aux; - else - return &dig_port->dp.aux; -} - static int intel_dp_hdcp2_read_rx_status(struct intel_connector *connector, u8 *rx_status) { struct drm_i915_private *i915 = to_i915(connector->base.dev); - struct drm_dp_aux *aux = intel_dp_hdcp_get_aux(connector); + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct drm_dp_aux *aux = &dig_port->dp.aux; ssize_t ret; ret = drm_dp_dpcd_read(aux, @@ -399,7 +389,9 @@ intel_dp_hdcp2_wait_for_msg(struct intel_connector *connector, const struct hdcp2_dp_msg_data *hdcp2_msg_data) { struct drm_i915_private *i915 = to_i915(connector->base.dev); - struct intel_hdcp *hdcp = &connector->hdcp; + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct intel_dp *dp = &dig_port->dp; + struct intel_hdcp *hdcp = &dp->attached_connector->hdcp; u8 msg_id = hdcp2_msg_data->msg_id; int ret, timeout; bool msg_ready = false; @@ -454,8 +446,9 @@ int intel_dp_hdcp2_write_msg(struct intel_connector *connector, unsigned int offset; u8 *byte = buf; ssize_t ret, bytes_to_write, len; + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct drm_dp_aux *aux = &dig_port->dp.aux; const struct hdcp2_dp_msg_data *hdcp2_msg_data; - struct drm_dp_aux *aux; hdcp2_msg_data = get_hdcp2_dp_msg_data(*byte); if (!hdcp2_msg_data) @@ -463,8 +456,6 @@ int intel_dp_hdcp2_write_msg(struct intel_connector *connector, offset = hdcp2_msg_data->offset; - aux = intel_dp_hdcp_get_aux(connector); - /* No msg_id in DP HDCP2.2 msgs */ bytes_to_write = size - 1; byte++; @@ -490,7 +481,8 @@ static ssize_t get_receiver_id_list_rx_info(struct intel_connector *connector, u32 *dev_cnt, u8 *byte) { - struct drm_dp_aux *aux = intel_dp_hdcp_get_aux(connector); + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct drm_dp_aux *aux = &dig_port->dp.aux; ssize_t ret; u8 *rx_info = byte; @@ -515,8 +507,9 @@ int intel_dp_hdcp2_read_msg(struct intel_connector *connector, { struct intel_digital_port *dig_port = intel_attached_dig_port(connector); struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - struct intel_hdcp *hdcp = &connector->hdcp; - struct drm_dp_aux *aux; + struct drm_dp_aux *aux = &dig_port->dp.aux; + struct intel_dp *dp = &dig_port->dp; + struct intel_hdcp *hdcp = &dp->attached_connector->hdcp; unsigned int offset; u8 *byte = buf; ssize_t ret, bytes_to_recv, len; @@ -530,8 +523,6 @@ int intel_dp_hdcp2_read_msg(struct intel_connector *connector, return -EINVAL; offset = hdcp2_msg_data->offset; - aux = intel_dp_hdcp_get_aux(connector); - ret = intel_dp_hdcp2_wait_for_msg(connector, hdcp2_msg_data); if (ret < 0) return ret; @@ -561,13 +552,8 @@ int intel_dp_hdcp2_read_msg(struct intel_connector *connector, /* Entire msg read timeout since initiate of msg read */ if (bytes_to_recv == size - 1 && hdcp2_msg_data->msg_read_timeout > 0) { - if (intel_encoder_is_mst(connector->encoder)) - msg_end = ktime_add_ms(ktime_get_raw(), - hdcp2_msg_data->msg_read_timeout * - connector->port->parent->num_ports); - else - msg_end = ktime_add_ms(ktime_get_raw(), - hdcp2_msg_data->msg_read_timeout); + msg_end = ktime_add_ms(ktime_get_raw(), + hdcp2_msg_data->msg_read_timeout); } ret = drm_dp_dpcd_read(aux, offset, @@ -651,12 +637,11 @@ static int intel_dp_hdcp2_capable(struct intel_connector *connector, bool *capable) { - struct drm_dp_aux *aux; + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct drm_dp_aux *aux = &dig_port->dp.aux; u8 rx_caps[3]; int ret; - aux = intel_dp_hdcp_get_aux(connector); - *capable = false; ret = drm_dp_dpcd_read(aux, DP_HDCP_2_2_REG_RX_CAPS_OFFSET, diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 8a9432335030..a01a59f57ae5 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1534,6 +1534,7 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo return NULL; intel_connector->get_hw_state = intel_dp_mst_get_hw_state; + intel_connector->sync_state = intel_dp_connector_sync_state; intel_connector->mst_port = intel_dp; intel_connector->port = port; drm_dp_mst_get_port_malloc(port); diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.c b/drivers/gpu/drm/i915/display/intel_modeset_setup.c index 94eece7f63be..caeca3a8442c 100644 --- a/drivers/gpu/drm/i915/display/intel_modeset_setup.c +++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.c @@ -318,12 +318,6 @@ static void intel_modeset_update_connector_atomic_state(struct drm_i915_private const struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); - if (crtc_state->dsc.compression_enable) { - drm_WARN_ON(&i915->drm, !connector->dp.dsc_decompression_aux); - connector->dp.dsc_decompression_enabled = true; - } else { - connector->dp.dsc_decompression_enabled = false; - } conn_state->max_bpc = (crtc_state->pipe_bpp ?: 24) / 3; } } @@ -775,8 +769,9 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915) drm_connector_list_iter_begin(&i915->drm, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { + struct intel_crtc_state *crtc_state = NULL; + if (connector->get_hw_state(connector)) { - struct intel_crtc_state *crtc_state; struct intel_crtc *crtc; connector->base.dpms = DRM_MODE_DPMS_ON; @@ -802,6 +797,10 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915) connector->base.dpms = DRM_MODE_DPMS_OFF; connector->base.encoder = NULL; } + + if (connector->sync_state) + connector->sync_state(connector, crtc_state); + drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] hw state readout: %s\n", connector->base.base.id, connector->base.name, diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 8f702c3fc62d..4faaf4b3fc53 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1525,8 +1525,18 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, * can rely on frontbuffer tracking. */ mask = EDP_PSR_DEBUG_MASK_MEMUP | - EDP_PSR_DEBUG_MASK_HPD | - EDP_PSR_DEBUG_MASK_LPSP; + EDP_PSR_DEBUG_MASK_HPD; + + /* + * For some unknown reason on HSW non-ULT (or at least on + * Dell Latitude E6540) external displays start to flicker + * when PSR is enabled on the eDP. SR/PC6 residency is much + * higher than should be possible with an external display. + * As a workaround leave LPSP unmasked to prevent PSR entry + * when external displays are active. + */ + if (DISPLAY_VER(dev_priv) >= 8 || IS_HASWELL_ULT(dev_priv)) + mask |= EDP_PSR_DEBUG_MASK_LPSP; if (DISPLAY_VER(dev_priv) < 20) mask |= EDP_PSR_DEBUG_MASK_MAX_SLEEP; @@ -2766,9 +2776,6 @@ void intel_psr_init(struct intel_dp *intel_dp) if (!(HAS_PSR(dev_priv) || HAS_DP20(dev_priv))) return; - if (!intel_dp_is_edp(intel_dp)) - intel_psr_init_dpcd(intel_dp); - /* * HSW spec explicitly says PSR is tied to port A. * BDW+ platforms have a instance of PSR registers per transcoder but diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index acc6b6804105..2915d7afe5cc 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -1209,7 +1209,7 @@ static bool intel_sdvo_set_tv_format(struct intel_sdvo *intel_sdvo, struct intel_sdvo_tv_format format; u32 format_map; - format_map = 1 << conn_state->tv.mode; + format_map = 1 << conn_state->tv.legacy_mode; memset(&format, 0, sizeof(format)); memcpy(&format, &format_map, min(sizeof(format), sizeof(format_map))); @@ -2298,7 +2298,7 @@ static int intel_sdvo_get_tv_modes(struct drm_connector *connector) * Read the list of supported input resolutions for the selected TV * format. */ - format_map = 1 << conn_state->tv.mode; + format_map = 1 << conn_state->tv.legacy_mode; memcpy(&tv_res, &format_map, min(sizeof(format_map), sizeof(struct intel_sdvo_sdtv_resolution_request))); @@ -2363,7 +2363,7 @@ intel_sdvo_connector_atomic_get_property(struct drm_connector *connector, int i; for (i = 0; i < intel_sdvo_connector->format_supported_num; i++) - if (state->tv.mode == intel_sdvo_connector->tv_format_supported[i]) { + if (state->tv.legacy_mode == intel_sdvo_connector->tv_format_supported[i]) { *val = i; return 0; @@ -2419,7 +2419,7 @@ intel_sdvo_connector_atomic_set_property(struct drm_connector *connector, struct intel_sdvo_connector_state *sdvo_state = to_intel_sdvo_connector_state(state); if (property == intel_sdvo_connector->tv_format) { - state->tv.mode = intel_sdvo_connector->tv_format_supported[val]; + state->tv.legacy_mode = intel_sdvo_connector->tv_format_supported[val]; if (state->crtc) { struct drm_crtc_state *crtc_state = @@ -3076,7 +3076,7 @@ static bool intel_sdvo_tv_create_property(struct intel_sdvo *intel_sdvo, drm_property_add_enum(intel_sdvo_connector->tv_format, i, tv_format_names[intel_sdvo_connector->tv_format_supported[i]]); - intel_sdvo_connector->base.base.state->tv.mode = intel_sdvo_connector->tv_format_supported[0]; + intel_sdvo_connector->base.base.state->tv.legacy_mode = intel_sdvo_connector->tv_format_supported[0]; drm_object_attach_property(&intel_sdvo_connector->base.base.base, intel_sdvo_connector->tv_format, 0); return true; diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index d4386cb3569e..992a725de751 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -949,7 +949,7 @@ intel_disable_tv(struct intel_atomic_state *state, static const struct tv_mode *intel_tv_mode_find(const struct drm_connector_state *conn_state) { - int format = conn_state->tv.mode; + int format = conn_state->tv.legacy_mode; return &tv_modes[format]; } @@ -1704,7 +1704,7 @@ static void intel_tv_find_better_format(struct drm_connector *connector) break; } - connector->state->tv.mode = i; + connector->state->tv.legacy_mode = i; } static int @@ -1859,7 +1859,7 @@ static int intel_tv_atomic_check(struct drm_connector *connector, old_state = drm_atomic_get_old_connector_state(state, connector); new_crtc_state = drm_atomic_get_new_crtc_state(state, new_state->crtc); - if (old_state->tv.mode != new_state->tv.mode || + if (old_state->tv.legacy_mode != new_state->tv.legacy_mode || old_state->tv.margins.left != new_state->tv.margins.left || old_state->tv.margins.right != new_state->tv.margins.right || old_state->tv.margins.top != new_state->tv.margins.top || @@ -1896,7 +1896,7 @@ static void intel_tv_add_properties(struct drm_connector *connector) conn_state->tv.margins.right = 46; conn_state->tv.margins.bottom = 37; - conn_state->tv.mode = 0; + conn_state->tv.legacy_mode = 0; /* Create TV properties then attach current values */ for (i = 0; i < ARRAY_SIZE(tv_modes); i++) { @@ -1910,7 +1910,7 @@ static void intel_tv_add_properties(struct drm_connector *connector) drm_object_attach_property(&connector->base, i915->drm.mode_config.legacy_tv_mode_property, - conn_state->tv.mode); + conn_state->tv.legacy_mode); drm_object_attach_property(&connector->base, i915->drm.mode_config.tv_left_margin_property, conn_state->tv.margins.left); diff --git a/drivers/gpu/drm/i915/display/intel_vdsc_regs.h b/drivers/gpu/drm/i915/display/intel_vdsc_regs.h index 64f440fdc22b..8b21dc8e26d5 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc_regs.h +++ b/drivers/gpu/drm/i915/display/intel_vdsc_regs.h @@ -51,8 +51,8 @@ #define DSCC_PICTURE_PARAMETER_SET_0 _MMIO(0x6BA00) #define _DSCA_PPS_0 0x6B200 #define _DSCC_PPS_0 0x6BA00 -#define DSCA_PPS(pps) _MMIO(_DSCA_PPS_0 + (pps) * 4) -#define DSCC_PPS(pps) _MMIO(_DSCC_PPS_0 + (pps) * 4) +#define DSCA_PPS(pps) _MMIO(_DSCA_PPS_0 + ((pps) < 12 ? (pps) : (pps) + 12) * 4) +#define DSCC_PPS(pps) _MMIO(_DSCC_PPS_0 + ((pps) < 12 ? (pps) : (pps) + 12) * 4) #define _ICL_DSC0_PICTURE_PARAMETER_SET_0_PB 0x78270 #define _ICL_DSC1_PICTURE_PARAMETER_SET_0_PB 0x78370 #define _ICL_DSC0_PICTURE_PARAMETER_SET_0_PC 0x78470 diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 1d3ebdf4069b..c08b67593565 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -379,6 +379,9 @@ i915_gem_userptr_release(struct drm_i915_gem_object *obj) { GEM_WARN_ON(obj->userptr.page_ref); + if (!obj->userptr.notifier.mm) + return; + mmu_interval_notifier_remove(&obj->userptr.notifier); obj->userptr.notifier.mm = NULL; } diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 90f6c1ece57d..efcb00472be2 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2849,8 +2849,7 @@ static int handle_mmio(struct intel_gvt_mmio_table_iter *iter, u32 offset, for (i = start; i < end; i += 4) { p = intel_gvt_find_mmio_info(gvt, i); if (p) { - WARN(1, "dup mmio definition offset %x\n", - info->offset); + WARN(1, "dup mmio definition offset %x\n", i); /* We return -EEXIST here to make GVT-g load fail. * So duplicated MMIO can be found as soon as diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index e98b6d69a91a..9b6d87c8b583 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -41,7 +41,7 @@ * To virtualize GPU resources GVT-g driver depends on hypervisor technology * e.g KVM/VFIO/mdev, Xen, etc. to provide resource access trapping capability * and be virtualized within GVT-g device module. More architectural design - * doc is available on https://01.org/group/2230/documentation-list. + * doc is available on https://github.com/intel/gvt-linux/wiki. */ static LIST_HEAD(intel_gvt_devices); diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c index 2990dd4d4a0d..e14ac0ab1314 100644 --- a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c @@ -3,6 +3,8 @@ * Copyright © 2021 Intel Corporation */ +#include <linux/jiffies.h> + //#include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "i915_drv.h" @@ -12,7 +14,7 @@ #define REDUCED_TIMESLICE 5 #define REDUCED_PREEMPT 10 -#define WAIT_FOR_RESET_TIME 10000 +#define WAIT_FOR_RESET_TIME_MS 10000 struct intel_engine_cs *intel_selftest_find_any_engine(struct intel_gt *gt) { @@ -91,7 +93,7 @@ int intel_selftest_wait_for_rq(struct i915_request *rq) { long ret; - ret = i915_request_wait(rq, 0, WAIT_FOR_RESET_TIME); + ret = i915_request_wait(rq, 0, msecs_to_jiffies(WAIT_FOR_RESET_TIME_MS)); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/meson/meson_encoder_cvbs.c b/drivers/gpu/drm/meson/meson_encoder_cvbs.c index 3f73b211fa8e..3407450435e2 100644 --- a/drivers/gpu/drm/meson/meson_encoder_cvbs.c +++ b/drivers/gpu/drm/meson/meson_encoder_cvbs.c @@ -294,6 +294,5 @@ void meson_encoder_cvbs_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_CVBS]) { meson_encoder_cvbs = priv->encoders[MESON_ENC_CVBS]; drm_bridge_remove(&meson_encoder_cvbs->bridge); - drm_bridge_remove(meson_encoder_cvbs->next_bridge); } } diff --git a/drivers/gpu/drm/meson/meson_encoder_dsi.c b/drivers/gpu/drm/meson/meson_encoder_dsi.c index 3f93c70488ca..311b91630fbe 100644 --- a/drivers/gpu/drm/meson/meson_encoder_dsi.c +++ b/drivers/gpu/drm/meson/meson_encoder_dsi.c @@ -168,6 +168,5 @@ void meson_encoder_dsi_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_DSI]) { meson_encoder_dsi = priv->encoders[MESON_ENC_DSI]; drm_bridge_remove(&meson_encoder_dsi->bridge); - drm_bridge_remove(meson_encoder_dsi->next_bridge); } } diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index 25ea76558690..c4686568c9ca 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -474,6 +474,5 @@ void meson_encoder_hdmi_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_HDMI]) { meson_encoder_hdmi = priv->encoders[MESON_ENC_HDMI]; drm_bridge_remove(&meson_encoder_hdmi->bridge); - drm_bridge_remove(meson_encoder_hdmi->next_bridge); } } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index c0bc924cd302..c9c55e2ea584 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1287,7 +1287,7 @@ static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu) gpu->ubwc_config.highest_bank_bit = 15; if (adreno_is_a610(gpu)) { - gpu->ubwc_config.highest_bank_bit = 14; + gpu->ubwc_config.highest_bank_bit = 13; gpu->ubwc_config.min_acc_len = 1; gpu->ubwc_config.ubwc_mode = 1; } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 83380bc92a00..6a4b489d44e5 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -144,10 +144,6 @@ enum dpu_enc_rc_states { * to track crtc in the disable() hook which is called * _after_ encoder_mask is cleared. * @connector: If a mode is set, cached pointer to the active connector - * @crtc_kickoff_cb: Callback into CRTC that will flush & start - * all CTL paths - * @crtc_kickoff_cb_data: Opaque user data given to crtc_kickoff_cb - * @debugfs_root: Debug file system root file node * @enc_lock: Lock around physical encoder * create/destroy/enable/disable * @frame_busy_mask: Bitmask tracking which phys_enc we are still @@ -2072,7 +2068,7 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) } /* reset the merge 3D HW block */ - if (phys_enc->hw_pp->merge_3d) { + if (phys_enc->hw_pp && phys_enc->hw_pp->merge_3d) { phys_enc->hw_pp->merge_3d->ops.setup_3d_mode(phys_enc->hw_pp->merge_3d, BLEND_3D_NONE); if (phys_enc->hw_ctl->ops.update_pending_flush_merge_3d) @@ -2103,7 +2099,7 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) if (phys_enc->hw_wb) intf_cfg.wb = phys_enc->hw_wb->idx; - if (phys_enc->hw_pp->merge_3d) + if (phys_enc->hw_pp && phys_enc->hw_pp->merge_3d) intf_cfg.merge_3d = phys_enc->hw_pp->merge_3d->idx; if (ctl->ops.reset_intf_cfg) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c index b58a9c2ae326..724537ab776d 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c @@ -29,7 +29,6 @@ static inline bool reserved_by_other(uint32_t *res_map, int idx, /** * struct dpu_rm_requirements - Reservation requirements parameter bundle * @topology: selected topology for the display - * @hw_res: Hardware resources required as reported by the encoders */ struct dpu_rm_requirements { struct msm_display_topology topology; @@ -204,6 +203,8 @@ static bool _dpu_rm_needs_split_display(const struct msm_display_topology *top) * _dpu_rm_get_lm_peer - get the id of a mixer which is a peer of the primary * @rm: dpu resource manager handle * @primary_idx: index of primary mixer in rm->mixer_blks[] + * + * Returns: lm peer mixed id on success or %-EINVAL on error */ static int _dpu_rm_get_lm_peer(struct dpu_rm *rm, int primary_idx) { diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index 77a8d9366ed7..fb588fde298a 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -135,11 +135,6 @@ static void dp_ctrl_config_ctrl(struct dp_ctrl_private *ctrl) tbd = dp_link_get_test_bits_depth(ctrl->link, ctrl->panel->dp_mode.bpp); - if (tbd == DP_TEST_BIT_DEPTH_UNKNOWN) { - pr_debug("BIT_DEPTH not set. Configure default\n"); - tbd = DP_TEST_BIT_DEPTH_8; - } - config |= tbd << DP_CONFIGURATION_CTRL_BPC_SHIFT; /* Num of Lanes */ diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index d37d599aec27..4c72124ffb5d 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -329,10 +329,26 @@ static const struct component_ops dp_display_comp_ops = { .unbind = dp_display_unbind, }; +static void dp_display_send_hpd_event(struct msm_dp *dp_display) +{ + struct dp_display_private *dp; + struct drm_connector *connector; + + dp = container_of(dp_display, struct dp_display_private, dp_display); + + connector = dp->dp_display.connector; + drm_helper_hpd_irq_event(connector->dev); +} + static int dp_display_send_hpd_notification(struct dp_display_private *dp, bool hpd) { - struct drm_bridge *bridge = dp->dp_display.bridge; + if ((hpd && dp->dp_display.link_ready) || + (!hpd && !dp->dp_display.link_ready)) { + drm_dbg_dp(dp->drm_dev, "HPD already %s\n", + (hpd ? "on" : "off")); + return 0; + } /* reset video pattern flag on disconnect */ if (!hpd) { @@ -348,7 +364,7 @@ static int dp_display_send_hpd_notification(struct dp_display_private *dp, drm_dbg_dp(dp->drm_dev, "type=%d hpd=%d\n", dp->dp_display.connector_type, hpd); - drm_bridge_hpd_notify(bridge, dp->dp_display.link_ready); + dp_display_send_hpd_event(&dp->dp_display); return 0; } diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c index 98427d45e9a7..49dfac1fd1ef 100644 --- a/drivers/gpu/drm/msm/dp/dp_link.c +++ b/drivers/gpu/drm/msm/dp/dp_link.c @@ -7,6 +7,7 @@ #include <drm/drm_print.h> +#include "dp_reg.h" #include "dp_link.h" #include "dp_panel.h" @@ -1082,7 +1083,7 @@ int dp_link_process_request(struct dp_link *dp_link) int dp_link_get_colorimetry_config(struct dp_link *dp_link) { - u32 cc; + u32 cc = DP_MISC0_COLORIMERY_CFG_LEGACY_RGB; struct dp_link_private *link; if (!dp_link) { @@ -1096,10 +1097,11 @@ int dp_link_get_colorimetry_config(struct dp_link *dp_link) * Unless a video pattern CTS test is ongoing, use RGB_VESA * Only RGB_VESA and RGB_CEA supported for now */ - if (dp_link_is_video_pattern_requested(link)) - cc = link->dp_link.test_video.test_dyn_range; - else - cc = DP_TEST_DYNAMIC_RANGE_VESA; + if (dp_link_is_video_pattern_requested(link)) { + if (link->dp_link.test_video.test_dyn_range & + DP_TEST_DYNAMIC_RANGE_CEA) + cc = DP_MISC0_COLORIMERY_CFG_CEA_RGB; + } return cc; } @@ -1179,6 +1181,9 @@ void dp_link_reset_phy_params_vx_px(struct dp_link *dp_link) u32 dp_link_get_test_bits_depth(struct dp_link *dp_link, u32 bpp) { u32 tbd; + struct dp_link_private *link; + + link = container_of(dp_link, struct dp_link_private, dp_link); /* * Few simplistic rules and assumptions made here: @@ -1196,12 +1201,13 @@ u32 dp_link_get_test_bits_depth(struct dp_link *dp_link, u32 bpp) tbd = DP_TEST_BIT_DEPTH_10; break; default: - tbd = DP_TEST_BIT_DEPTH_UNKNOWN; + drm_dbg_dp(link->drm_dev, "bpp=%d not supported, use bpc=8\n", + bpp); + tbd = DP_TEST_BIT_DEPTH_8; break; } - if (tbd != DP_TEST_BIT_DEPTH_UNKNOWN) - tbd = (tbd >> DP_TEST_BIT_DEPTH_SHIFT); + tbd = (tbd >> DP_TEST_BIT_DEPTH_SHIFT); return tbd; } diff --git a/drivers/gpu/drm/msm/dp/dp_reg.h b/drivers/gpu/drm/msm/dp/dp_reg.h index ea85a691e72b..78785ed4b40c 100644 --- a/drivers/gpu/drm/msm/dp/dp_reg.h +++ b/drivers/gpu/drm/msm/dp/dp_reg.h @@ -143,6 +143,9 @@ #define DP_MISC0_COLORIMETRY_CFG_SHIFT (0x00000001) #define DP_MISC0_TEST_BITS_DEPTH_SHIFT (0x00000005) +#define DP_MISC0_COLORIMERY_CFG_LEGACY_RGB (0) +#define DP_MISC0_COLORIMERY_CFG_CEA_RGB (0x04) + #define REG_DP_VALID_BOUNDARY (0x00000030) #define REG_DP_VALID_BOUNDARY_2 (0x00000034) diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c index 5f68e31a3e4e..0915f3b68752 100644 --- a/drivers/gpu/drm/msm/msm_gem_prime.c +++ b/drivers/gpu/drm/msm/msm_gem_prime.c @@ -26,7 +26,7 @@ int msm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map) { void *vaddr; - vaddr = msm_gem_get_vaddr(obj); + vaddr = msm_gem_get_vaddr_locked(obj); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); iosys_map_set_vaddr(map, vaddr); @@ -36,7 +36,7 @@ int msm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map) void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct iosys_map *map) { - msm_gem_put_vaddr(obj); + msm_gem_put_vaddr_locked(obj); } struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 095390774f22..655002b21b0d 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -751,12 +751,14 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned long flags; - pm_runtime_get_sync(&gpu->pdev->dev); + WARN_ON(!mutex_is_locked(&gpu->lock)); - mutex_lock(&gpu->lock); + pm_runtime_get_sync(&gpu->pdev->dev); msm_gpu_hw_init(gpu); + submit->seqno = submit->hw_fence->seqno; + update_sw_cntrs(gpu); /* @@ -781,11 +783,8 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) gpu->funcs->submit(gpu, submit); gpu->cur_ctx_seqno = submit->queue->ctx->seqno; - hangcheck_timer_reset(gpu); - - mutex_unlock(&gpu->lock); - pm_runtime_put(&gpu->pdev->dev); + hangcheck_timer_reset(gpu); } /* diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 5cc8d358cc97..d5512037c38b 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -21,6 +21,8 @@ struct msm_iommu_pagetable { struct msm_mmu base; struct msm_mmu *parent; struct io_pgtable_ops *pgtbl_ops; + const struct iommu_flush_ops *tlb; + struct device *iommu_dev; unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ phys_addr_t ttbr; u32 asid; @@ -201,11 +203,33 @@ static const struct msm_mmu_funcs pagetable_funcs = { static void msm_iommu_tlb_flush_all(void *cookie) { + struct msm_iommu_pagetable *pagetable = cookie; + struct adreno_smmu_priv *adreno_smmu; + + if (!pm_runtime_get_if_in_use(pagetable->iommu_dev)) + return; + + adreno_smmu = dev_get_drvdata(pagetable->parent->dev); + + pagetable->tlb->tlb_flush_all((void *)adreno_smmu->cookie); + + pm_runtime_put_autosuspend(pagetable->iommu_dev); } static void msm_iommu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule, void *cookie) { + struct msm_iommu_pagetable *pagetable = cookie; + struct adreno_smmu_priv *adreno_smmu; + + if (!pm_runtime_get_if_in_use(pagetable->iommu_dev)) + return; + + adreno_smmu = dev_get_drvdata(pagetable->parent->dev); + + pagetable->tlb->tlb_flush_walk(iova, size, granule, (void *)adreno_smmu->cookie); + + pm_runtime_put_autosuspend(pagetable->iommu_dev); } static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather, @@ -213,7 +237,7 @@ static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather, { } -static const struct iommu_flush_ops null_tlb_ops = { +static const struct iommu_flush_ops tlb_ops = { .tlb_flush_all = msm_iommu_tlb_flush_all, .tlb_flush_walk = msm_iommu_tlb_flush_walk, .tlb_add_page = msm_iommu_tlb_add_page, @@ -254,10 +278,10 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) /* The incoming cfg will have the TTBR1 quirk enabled */ ttbr0_cfg.quirks &= ~IO_PGTABLE_QUIRK_ARM_TTBR1; - ttbr0_cfg.tlb = &null_tlb_ops; + ttbr0_cfg.tlb = &tlb_ops; pagetable->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, - &ttbr0_cfg, iommu->domain); + &ttbr0_cfg, pagetable); if (!pagetable->pgtbl_ops) { kfree(pagetable); @@ -279,6 +303,8 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) /* Needed later for TLB flush */ pagetable->parent = parent; + pagetable->tlb = ttbr1_cfg->tlb; + pagetable->iommu_dev = ttbr1_cfg->iommu_dev; pagetable->pgsize_bitmap = ttbr0_cfg.pgsize_bitmap; pagetable->ttbr = ttbr0_cfg.arm_lpae_s1_cfg.ttbr; diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 455b2e3a0cdd..35423d10aafa 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -562,6 +562,7 @@ static const struct msm_mdss_data sdm670_data = { .ubwc_enc_version = UBWC_2_0, .ubwc_dec_version = UBWC_2_0, .highest_bank_bit = 1, + .reg_bus_bw = 76800, }; static const struct msm_mdss_data sdm845_data = { diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 4bc13f7d005a..9d6655f96f0c 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -21,8 +21,6 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) msm_fence_init(submit->hw_fence, fctx); - submit->seqno = submit->hw_fence->seqno; - mutex_lock(&priv->lru.lock); for (i = 0; i < submit->nr_bos; i++) { @@ -35,8 +33,13 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) mutex_unlock(&priv->lru.lock); + /* TODO move submit path over to using a per-ring lock.. */ + mutex_lock(&gpu->lock); + msm_gpu_submit(gpu, submit); + mutex_unlock(&gpu->lock); + return dma_fence_get(submit->hw_fence); } diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index 1e6aaf95ff7c..ceef470c9fbf 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -100,3 +100,11 @@ config DRM_NOUVEAU_SVM help Say Y here if you want to enable experimental support for Shared Virtual Memory (SVM). + +config DRM_NOUVEAU_GSP_DEFAULT + bool "Use GSP firmware for Turing/Ampere (needs firmware installed)" + depends on DRM_NOUVEAU + default n + help + Say Y here if you want to use the GSP codepaths by default on + Turing and Ampere GPUs. diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h index 0d9fc741a719..932c9fd0b2d8 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h @@ -11,6 +11,7 @@ struct nvkm_client { u32 debug; struct rb_root objroot; + spinlock_t obj_lock; void *data; int (*event)(u64 token, void *argv, u32 argc); diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h index d1437c08645f..6f5d376d8fcc 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h @@ -9,7 +9,7 @@ #define GSP_PAGE_SIZE BIT(GSP_PAGE_SHIFT) struct nvkm_gsp_mem { - u32 size; + size_t size; void *data; dma_addr_t addr; }; diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index a04156ca8390..80f74ee0fc78 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -128,12 +128,14 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16, struct nouveau_abi16_ntfy *ntfy, *temp; /* Cancel all jobs from the entity's queue. */ - drm_sched_entity_fini(&chan->sched.entity); + if (chan->sched) + drm_sched_entity_fini(&chan->sched->entity); if (chan->chan) nouveau_channel_idle(chan->chan); - nouveau_sched_fini(&chan->sched); + if (chan->sched) + nouveau_sched_destroy(&chan->sched); /* cleanup notifier state */ list_for_each_entry_safe(ntfy, temp, &chan->notifiers, head) { @@ -197,6 +199,7 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) struct nouveau_cli *cli = nouveau_cli(file_priv); struct nouveau_drm *drm = nouveau_drm(dev); struct nvif_device *device = &drm->client.device; + struct nvkm_device *nvkm_device = nvxx_device(&drm->client.device); struct nvkm_gr *gr = nvxx_gr(device); struct drm_nouveau_getparam *getparam = data; struct pci_dev *pdev = to_pci_dev(dev->dev); @@ -261,6 +264,14 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) getparam->value = nouveau_exec_push_max_from_ib_max(ib_max); break; } + case NOUVEAU_GETPARAM_VRAM_BAR_SIZE: + getparam->value = nvkm_device->func->resource_size(nvkm_device, 1); + break; + case NOUVEAU_GETPARAM_VRAM_USED: { + struct ttm_resource_manager *vram_mgr = ttm_manager_type(&drm->ttm.bdev, TTM_PL_VRAM); + getparam->value = (u64)ttm_resource_manager_usage(vram_mgr); + break; + } default: NV_PRINTK(dbg, cli, "unknown parameter %lld\n", getparam->param); return -EINVAL; @@ -337,10 +348,16 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) if (ret) goto done; - ret = nouveau_sched_init(&chan->sched, drm, drm->sched_wq, - chan->chan->dma.ib_max); - if (ret) - goto done; + /* If we're not using the VM_BIND uAPI, we don't need a scheduler. + * + * The client lock is already acquired by nouveau_abi16_get(). + */ + if (nouveau_cli_uvmm(cli)) { + ret = nouveau_sched_create(&chan->sched, drm, drm->sched_wq, + chan->chan->dma.ib_max); + if (ret) + goto done; + } init->channel = chan->chan->chid; diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h index 1f5e243c0c75..11c8c4a80079 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.h +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h @@ -26,7 +26,7 @@ struct nouveau_abi16_chan { struct nouveau_bo *ntfy; struct nouveau_vma *ntfy_vma; struct nvkm_mm heap; - struct nouveau_sched sched; + struct nouveau_sched *sched; }; struct nouveau_abi16 { diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 6f6c31a9937b..a947e1d5f309 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -201,7 +201,8 @@ nouveau_cli_fini(struct nouveau_cli *cli) WARN_ON(!list_empty(&cli->worker)); usif_client_fini(cli); - nouveau_sched_fini(&cli->sched); + if (cli->sched) + nouveau_sched_destroy(&cli->sched); if (uvmm) nouveau_uvmm_fini(uvmm); nouveau_vmm_fini(&cli->svm); @@ -311,7 +312,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname, cli->mem = &mems[ret]; /* Don't pass in the (shared) sched_wq in order to let - * nouveau_sched_init() create a dedicated one for VM_BIND jobs. + * nouveau_sched_create() create a dedicated one for VM_BIND jobs. * * This is required to ensure that for VM_BIND jobs free_job() work and * run_job() work can always run concurrently and hence, free_job() work @@ -320,7 +321,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname, * locks which indirectly or directly are held for allocations * elsewhere. */ - ret = nouveau_sched_init(&cli->sched, drm, NULL, 1); + ret = nouveau_sched_create(&cli->sched, drm, NULL, 1); if (ret) goto done; diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 8a6d94c8b163..e239c6bf4afa 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -98,7 +98,7 @@ struct nouveau_cli { bool disabled; } uvmm; - struct nouveau_sched sched; + struct nouveau_sched *sched; const struct nvif_mclass *mem; diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c index bc5d71b79ab2..e65c0ef23bc7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.c +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c @@ -389,7 +389,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev, if (ret) goto out; - args.sched = &chan16->sched; + args.sched = chan16->sched; args.file_priv = file_priv; args.chan = chan; diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 5057d976fa57..93f08f9479d8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -62,7 +62,7 @@ nouveau_fence_signal(struct nouveau_fence *fence) if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) { struct nouveau_fence_chan *fctx = nouveau_fctx(fence); - if (atomic_dec_and_test(&fctx->notify_ref)) + if (!--fctx->notify_ref) drop = 1; } @@ -103,7 +103,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) void nouveau_fence_context_del(struct nouveau_fence_chan *fctx) { - cancel_work_sync(&fctx->allow_block_work); + cancel_work_sync(&fctx->uevent_work); nouveau_fence_context_kill(fctx, 0); nvif_event_dtor(&fctx->event); fctx->dead = 1; @@ -146,12 +146,13 @@ nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fc return drop; } -static int -nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc) +static void +nouveau_fence_uevent_work(struct work_struct *work) { - struct nouveau_fence_chan *fctx = container_of(event, typeof(*fctx), event); + struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan, + uevent_work); unsigned long flags; - int ret = NVIF_EVENT_KEEP; + int drop = 0; spin_lock_irqsave(&fctx->lock, flags); if (!list_empty(&fctx->pending)) { @@ -161,23 +162,20 @@ nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc fence = list_entry(fctx->pending.next, typeof(*fence), head); chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock)); if (nouveau_fence_update(chan, fctx)) - ret = NVIF_EVENT_DROP; + drop = 1; } - spin_unlock_irqrestore(&fctx->lock, flags); + if (drop) + nvif_event_block(&fctx->event); - return ret; + spin_unlock_irqrestore(&fctx->lock, flags); } -static void -nouveau_fence_work_allow_block(struct work_struct *work) +static int +nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc) { - struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan, - allow_block_work); - - if (atomic_read(&fctx->notify_ref) == 0) - nvif_event_block(&fctx->event); - else - nvif_event_allow(&fctx->event); + struct nouveau_fence_chan *fctx = container_of(event, typeof(*fctx), event); + schedule_work(&fctx->uevent_work); + return NVIF_EVENT_KEEP; } void @@ -191,7 +189,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha } args; int ret; - INIT_WORK(&fctx->allow_block_work, nouveau_fence_work_allow_block); + INIT_WORK(&fctx->uevent_work, nouveau_fence_uevent_work); INIT_LIST_HEAD(&fctx->flip); INIT_LIST_HEAD(&fctx->pending); spin_lock_init(&fctx->lock); @@ -535,19 +533,15 @@ static bool nouveau_fence_enable_signaling(struct dma_fence *f) struct nouveau_fence *fence = from_fence(f); struct nouveau_fence_chan *fctx = nouveau_fctx(fence); bool ret; - bool do_work; - if (atomic_inc_return(&fctx->notify_ref) == 0) - do_work = true; + if (!fctx->notify_ref++) + nvif_event_allow(&fctx->event); ret = nouveau_fence_no_signaling(f); if (ret) set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags); - else if (atomic_dec_and_test(&fctx->notify_ref)) - do_work = true; - - if (do_work) - schedule_work(&fctx->allow_block_work); + else if (!--fctx->notify_ref) + nvif_event_block(&fctx->event); return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 28f5cf013b89..8bc065acfe35 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -3,7 +3,6 @@ #define __NOUVEAU_FENCE_H__ #include <linux/dma-fence.h> -#include <linux/workqueue.h> #include <nvif/event.h> struct nouveau_drm; @@ -45,10 +44,9 @@ struct nouveau_fence_chan { u32 context; char name[32]; + struct work_struct uevent_work; struct nvif_event event; - struct work_struct allow_block_work; - atomic_t notify_ref; - int dead, killed; + int notify_ref, dead, killed; }; struct nouveau_fence_priv { diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 49c2bcbef129..5a887d67dc0e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -764,7 +764,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, return -ENOMEM; if (unlikely(nouveau_cli_uvmm(cli))) - return -ENOSYS; + return nouveau_abi16_put(abi16, -ENOSYS); list_for_each_entry(temp, &abi16->channels, head) { if (temp->chan->chid == req->channel) { diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index dd98f6910f9c..32fa2e273965 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -398,7 +398,7 @@ static const struct drm_sched_backend_ops nouveau_sched_ops = { .free_job = nouveau_sched_free_job, }; -int +static int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, struct workqueue_struct *wq, u32 credit_limit) { @@ -453,7 +453,30 @@ fail_wq: return ret; } -void +int +nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm, + struct workqueue_struct *wq, u32 credit_limit) +{ + struct nouveau_sched *sched; + int ret; + + sched = kzalloc(sizeof(*sched), GFP_KERNEL); + if (!sched) + return -ENOMEM; + + ret = nouveau_sched_init(sched, drm, wq, credit_limit); + if (ret) { + kfree(sched); + return ret; + } + + *psched = sched; + + return 0; +} + + +static void nouveau_sched_fini(struct nouveau_sched *sched) { struct drm_gpu_scheduler *drm_sched = &sched->base; @@ -471,3 +494,14 @@ nouveau_sched_fini(struct nouveau_sched *sched) if (sched->wq) destroy_workqueue(sched->wq); } + +void +nouveau_sched_destroy(struct nouveau_sched **psched) +{ + struct nouveau_sched *sched = *psched; + + nouveau_sched_fini(sched); + kfree(sched); + + *psched = NULL; +} diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h index a6528f5981e6..e1f01a23e6f6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.h +++ b/drivers/gpu/drm/nouveau/nouveau_sched.h @@ -111,8 +111,8 @@ struct nouveau_sched { } job; }; -int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, - struct workqueue_struct *wq, u32 credit_limit); -void nouveau_sched_fini(struct nouveau_sched *sched); +int nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm, + struct workqueue_struct *wq, u32 credit_limit); +void nouveau_sched_destroy(struct nouveau_sched **psched); #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index cc03e0c22ff3..5e4565c5011a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -1011,7 +1011,7 @@ nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id) if (ret) return ret; - buffer->fault = kvcalloc(sizeof(*buffer->fault), buffer->entries, GFP_KERNEL); + buffer->fault = kvcalloc(buffer->entries, sizeof(*buffer->fault), GFP_KERNEL); if (!buffer->fault) return -ENOMEM; diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 4f223c972c6a..0a0a11dc9ec0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -1740,7 +1740,7 @@ nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev, if (ret) return ret; - args.sched = &cli->sched; + args.sched = cli->sched; args.file_priv = file_priv; ret = nouveau_uvmm_vm_bind(&args); diff --git a/drivers/gpu/drm/nouveau/nvkm/core/client.c b/drivers/gpu/drm/nouveau/nvkm/core/client.c index ebdeb8eb9e77..c55662937ab2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/client.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/client.c @@ -180,6 +180,7 @@ nvkm_client_new(const char *name, u64 device, const char *cfg, const char *dbg, client->device = device; client->debug = nvkm_dbgopt(dbg, "CLIENT"); client->objroot = RB_ROOT; + spin_lock_init(&client->obj_lock); client->event = event; INIT_LIST_HEAD(&client->umem); spin_lock_init(&client->lock); diff --git a/drivers/gpu/drm/nouveau/nvkm/core/object.c b/drivers/gpu/drm/nouveau/nvkm/core/object.c index 7c554c14e884..aea3ba72027a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/object.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/object.c @@ -30,8 +30,10 @@ nvkm_object_search(struct nvkm_client *client, u64 handle, const struct nvkm_object_func *func) { struct nvkm_object *object; + unsigned long flags; if (handle) { + spin_lock_irqsave(&client->obj_lock, flags); struct rb_node *node = client->objroot.rb_node; while (node) { object = rb_entry(node, typeof(*object), node); @@ -40,9 +42,12 @@ nvkm_object_search(struct nvkm_client *client, u64 handle, else if (handle > object->object) node = node->rb_right; - else + else { + spin_unlock_irqrestore(&client->obj_lock, flags); goto done; + } } + spin_unlock_irqrestore(&client->obj_lock, flags); return ERR_PTR(-ENOENT); } else { object = &client->object; @@ -57,30 +62,39 @@ done: void nvkm_object_remove(struct nvkm_object *object) { + unsigned long flags; + + spin_lock_irqsave(&object->client->obj_lock, flags); if (!RB_EMPTY_NODE(&object->node)) rb_erase(&object->node, &object->client->objroot); + spin_unlock_irqrestore(&object->client->obj_lock, flags); } bool nvkm_object_insert(struct nvkm_object *object) { - struct rb_node **ptr = &object->client->objroot.rb_node; + struct rb_node **ptr; struct rb_node *parent = NULL; + unsigned long flags; + spin_lock_irqsave(&object->client->obj_lock, flags); + ptr = &object->client->objroot.rb_node; while (*ptr) { struct nvkm_object *this = rb_entry(*ptr, typeof(*this), node); parent = *ptr; - if (object->object < this->object) + if (object->object < this->object) { ptr = &parent->rb_left; - else - if (object->object > this->object) + } else if (object->object > this->object) { ptr = &parent->rb_right; - else + } else { + spin_unlock_irqrestore(&object->client->obj_lock, flags); return false; + } } rb_link_node(&object->node, parent, ptr); rb_insert_color(&object->node, &object->client->objroot); + spin_unlock_irqrestore(&object->client->obj_lock, flags); return true; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c index 4135690326f4..3a30bea30e36 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c @@ -168,12 +168,11 @@ r535_bar_new_(const struct nvkm_bar_func *hw, struct nvkm_device *device, rm->flush = r535_bar_flush; ret = gf100_bar_new_(rm, device, type, inst, &bar); - *pbar = bar; if (ret) { - if (!bar) - kfree(rm); + kfree(rm); return ret; } + *pbar = bar; bar->flushBAR2PhysMode = ioremap(device->func->resource_addr(device, 3), PAGE_SIZE); if (!bar->flushBAR2PhysMode) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c index 19188683c8fc..8c2bf1c16f2a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c @@ -154,11 +154,17 @@ shadow_fw_init(struct nvkm_bios *bios, const char *name) return (void *)fw; } +static void +shadow_fw_release(void *fw) +{ + release_firmware(fw); +} + static const struct nvbios_source shadow_fw = { .name = "firmware", .init = shadow_fw_init, - .fini = (void(*)(void *))release_firmware, + .fini = shadow_fw_release, .read = shadow_fw_read, .rw = false, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index 9ee58e2a0eb2..a73a5b589790 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -997,6 +997,32 @@ r535_gsp_rpc_get_gsp_static_info(struct nvkm_gsp *gsp) return 0; } +static void +nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem) +{ + if (mem->data) { + /* + * Poison the buffer to catch any unexpected access from + * GSP-RM if the buffer was prematurely freed. + */ + memset(mem->data, 0xFF, mem->size); + + dma_free_coherent(gsp->subdev.device->dev, mem->size, mem->data, mem->addr); + memset(mem, 0, sizeof(*mem)); + } +} + +static int +nvkm_gsp_mem_ctor(struct nvkm_gsp *gsp, size_t size, struct nvkm_gsp_mem *mem) +{ + mem->size = size; + mem->data = dma_alloc_coherent(gsp->subdev.device->dev, size, &mem->addr, GFP_KERNEL); + if (WARN_ON(!mem->data)) + return -ENOMEM; + + return 0; +} + static int r535_gsp_postinit(struct nvkm_gsp *gsp) { @@ -1024,6 +1050,11 @@ r535_gsp_postinit(struct nvkm_gsp *gsp) nvkm_inth_allow(&gsp->subdev.inth); nvkm_wr32(device, 0x110004, 0x00000040); + + /* Release the DMA buffers that were needed only for boot and init */ + nvkm_gsp_mem_dtor(gsp, &gsp->boot.fw); + nvkm_gsp_mem_dtor(gsp, &gsp->libos); + return ret; } @@ -1078,7 +1109,6 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp) if (IS_ERR(rpc)) return PTR_ERR(rpc); - rpc->size = sizeof(*rpc); rpc->numEntries = NV_GSP_REG_NUM_ENTRIES; str_offset = offsetof(typeof(*rpc), entries[NV_GSP_REG_NUM_ENTRIES]); @@ -1094,6 +1124,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp) strings += name_len; str_offset += name_len; } + rpc->size = str_offset; return nvkm_gsp_rpc_wr(gsp, rpc, false); } @@ -1532,27 +1563,6 @@ r535_gsp_msg_run_cpu_sequencer(void *priv, u32 fn, void *repv, u32 repc) return 0; } -static void -nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem) -{ - if (mem->data) { - dma_free_coherent(gsp->subdev.device->dev, mem->size, mem->data, mem->addr); - mem->data = NULL; - } -} - -static int -nvkm_gsp_mem_ctor(struct nvkm_gsp *gsp, u32 size, struct nvkm_gsp_mem *mem) -{ - mem->size = size; - mem->data = dma_alloc_coherent(gsp->subdev.device->dev, size, &mem->addr, GFP_KERNEL); - if (WARN_ON(!mem->data)) - return -ENOMEM; - - return 0; -} - - static int r535_gsp_booter_unload(struct nvkm_gsp *gsp, u32 mbox0, u32 mbox1) { @@ -1938,20 +1948,20 @@ nvkm_gsp_radix3_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_radix3 *rx3) * See kgspCreateRadix3_IMPL */ static int -nvkm_gsp_radix3_sg(struct nvkm_device *device, struct sg_table *sgt, u64 size, +nvkm_gsp_radix3_sg(struct nvkm_gsp *gsp, struct sg_table *sgt, u64 size, struct nvkm_gsp_radix3 *rx3) { u64 addr; for (int i = ARRAY_SIZE(rx3->mem) - 1; i >= 0; i--) { u64 *ptes; - int idx; + size_t bufsize; + int ret, idx; - rx3->mem[i].size = ALIGN((size / GSP_PAGE_SIZE) * sizeof(u64), GSP_PAGE_SIZE); - rx3->mem[i].data = dma_alloc_coherent(device->dev, rx3->mem[i].size, - &rx3->mem[i].addr, GFP_KERNEL); - if (WARN_ON(!rx3->mem[i].data)) - return -ENOMEM; + bufsize = ALIGN((size / GSP_PAGE_SIZE) * sizeof(u64), GSP_PAGE_SIZE); + ret = nvkm_gsp_mem_ctor(gsp, bufsize, &rx3->mem[i]); + if (ret) + return ret; ptes = rx3->mem[i].data; if (i == 2) { @@ -1991,7 +2001,7 @@ r535_gsp_fini(struct nvkm_gsp *gsp, bool suspend) if (ret) return ret; - ret = nvkm_gsp_radix3_sg(gsp->subdev.device, &gsp->sr.sgt, len, &gsp->sr.radix3); + ret = nvkm_gsp_radix3_sg(gsp, &gsp->sr.sgt, len, &gsp->sr.radix3); if (ret) return ret; @@ -2150,6 +2160,13 @@ r535_gsp_dtor(struct nvkm_gsp *gsp) mutex_destroy(&gsp->cmdq.mutex); r535_gsp_dtor_fws(gsp); + + nvkm_gsp_mem_dtor(gsp, &gsp->rmargs); + nvkm_gsp_mem_dtor(gsp, &gsp->wpr_meta); + nvkm_gsp_mem_dtor(gsp, &gsp->shm.mem); + nvkm_gsp_mem_dtor(gsp, &gsp->loginit); + nvkm_gsp_mem_dtor(gsp, &gsp->logintr); + nvkm_gsp_mem_dtor(gsp, &gsp->logrm); } int @@ -2194,7 +2211,7 @@ r535_gsp_oneinit(struct nvkm_gsp *gsp) memcpy(gsp->sig.data, data, size); /* Build radix3 page table for ELF image. */ - ret = nvkm_gsp_radix3_sg(device, &gsp->fw.mem.sgt, gsp->fw.len, &gsp->radix3); + ret = nvkm_gsp_radix3_sg(gsp, &gsp->fw.mem.sgt, gsp->fw.len, &gsp->radix3); if (ret) return ret; @@ -2295,8 +2312,12 @@ r535_gsp_load(struct nvkm_gsp *gsp, int ver, const struct nvkm_gsp_fwif *fwif) { struct nvkm_subdev *subdev = &gsp->subdev; int ret; + bool enable_gsp = fwif->enable; - if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", fwif->enable)) +#if IS_ENABLED(CONFIG_DRM_NOUVEAU_GSP_DEFAULT) + enable_gsp = true; +#endif + if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", enable_gsp)) return -EINVAL; if ((ret = r535_gsp_load_fw(gsp, "gsp", fwif->ver, &gsp->fws.rm)) || diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index dad938cf6dec..8f3783742208 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -539,6 +539,8 @@ config DRM_PANEL_RAYDIUM_RM692E5 depends on OF depends on DRM_MIPI_DSI depends on BACKLIGHT_CLASS_DEVICE + select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HELPER help Say Y here if you want to enable support for Raydium RM692E5-based display panels, such as the one found in the Fairphone 5 smartphone. diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index c4c0f08e9202..4945a1e787eb 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c @@ -1768,11 +1768,11 @@ static const struct panel_desc starry_qfh032011_53g_desc = { }; static const struct drm_display_mode starry_himax83102_j02_default_mode = { - .clock = 162850, + .clock = 162680, .hdisplay = 1200, - .hsync_start = 1200 + 50, - .hsync_end = 1200 + 50 + 20, - .htotal = 1200 + 50 + 20 + 50, + .hsync_start = 1200 + 60, + .hsync_end = 1200 + 60 + 20, + .htotal = 1200 + 60 + 20 + 40, .vdisplay = 1920, .vsync_start = 1920 + 116, .vsync_end = 1920 + 116 + 8, diff --git a/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c b/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c index ea5a85779382..f23d8832a1ad 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c @@ -309,7 +309,7 @@ static const struct s6d7aa0_panel_desc s6d7aa0_lsl080al02_desc = { .off_func = s6d7aa0_lsl080al02_off, .drm_mode = &s6d7aa0_lsl080al02_mode, .mode_flags = MIPI_DSI_MODE_VSYNC_FLUSH | MIPI_DSI_MODE_VIDEO_NO_HFP, - .bus_flags = DRM_BUS_FLAG_DE_HIGH, + .bus_flags = 0, .has_backlight = false, .use_passwd3 = false, diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 2214cb09678c..d493ee735c73 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -3948,6 +3948,7 @@ static const struct panel_desc tianma_tm070jdhg30 = { }, .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, .connector_type = DRM_MODE_CONNECTOR_LVDS, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, }; static const struct panel_desc tianma_tm070jvhg33 = { @@ -3960,6 +3961,7 @@ static const struct panel_desc tianma_tm070jvhg33 = { }, .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, .connector_type = DRM_MODE_CONNECTOR_LVDS, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, }; static const struct display_timing tianma_tm070rvhg71_timing = { diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index 85b3b4871a1d..fdd768bbd487 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -1985,8 +1985,10 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc, clock = vop2_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags); } - if (!clock) + if (!clock) { + vop2_unlock(vop2); return; + } if (vcstate->output_mode == ROCKCHIP_OUT_MODE_AAAA && !(vp_data->feature & VOP2_VP_FEATURE_OUTPUT_10BIT)) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 550492a7a031..d442b893275b 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -1184,14 +1184,16 @@ static void drm_sched_run_job_work(struct work_struct *w) if (READ_ONCE(sched->pause_submit)) return; + /* Find entity with a ready job */ entity = drm_sched_select_entity(sched); if (!entity) - return; + return; /* No more work */ sched_job = drm_sched_entity_pop_job(entity); if (!sched_job) { complete_all(&entity->entity_idle); - return; /* No more work */ + drm_sched_run_job_queue(sched); + return; } s_fence = sched_job->s_fence; diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index ff36171c8fb7..03d1c76aec2d 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -960,7 +960,8 @@ int host1x_client_iommu_attach(struct host1x_client *client) * not the shared IOMMU domain, don't try to attach it to a different * domain. This allows using the IOMMU-backed DMA API. */ - if (domain && domain != tegra->domain) + if (domain && domain->type != IOMMU_DOMAIN_IDENTITY && + domain != tegra->domain) return 0; if (tegra->domain) { @@ -1242,9 +1243,26 @@ static int host1x_drm_probe(struct host1x_device *dev) drm_mode_config_reset(drm); - err = drm_aperture_remove_framebuffers(&tegra_drm_driver); - if (err < 0) - goto hub; + /* + * Only take over from a potential firmware framebuffer if any CRTCs + * have been registered. This must not be a fatal error because there + * are other accelerators that are exposed via this driver. + * + * Another case where this happens is on Tegra234 where the display + * hardware is no longer part of the host1x complex, so this driver + * will not expose any modesetting features. + */ + if (drm->mode_config.num_crtc > 0) { + err = drm_aperture_remove_framebuffers(&tegra_drm_driver); + if (err < 0) + goto hub; + } else { + /* + * Indicate to userspace that this doesn't expose any display + * capabilities. + */ + drm->driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC); + } err = drm_dev_register(drm, 0); if (err < 0) diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c index ea2af6bd9abe..e48863a44556 100644 --- a/drivers/gpu/drm/tests/drm_buddy_test.c +++ b/drivers/gpu/drm/tests/drm_buddy_test.c @@ -8,16 +8,308 @@ #include <linux/prime_numbers.h> #include <linux/sched/signal.h> +#include <linux/sizes.h> #include <drm/drm_buddy.h> #include "../lib/drm_random.h" +static unsigned int random_seed; + static inline u64 get_size(int order, u64 chunk_size) { return (1 << order) * chunk_size; } +static void drm_test_buddy_alloc_range_bias(struct kunit *test) +{ + u32 mm_size, ps, bias_size, bias_start, bias_end, bias_rem; + DRM_RND_STATE(prng, random_seed); + unsigned int i, count, *order; + struct drm_buddy mm; + LIST_HEAD(allocated); + + bias_size = SZ_1M; + ps = roundup_pow_of_two(prandom_u32_state(&prng) % bias_size); + ps = max(SZ_4K, ps); + mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */ + + kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps); + + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps), + "buddy_init failed\n"); + + count = mm_size / bias_size; + order = drm_random_order(count, &prng); + KUNIT_EXPECT_TRUE(test, order); + + /* + * Idea is to split the address space into uniform bias ranges, and then + * in some random order allocate within each bias, using various + * patterns within. This should detect if allocations leak out from a + * given bias, for example. + */ + + for (i = 0; i < count; i++) { + LIST_HEAD(tmp); + u32 size; + + bias_start = order[i] * bias_size; + bias_end = bias_start + bias_size; + bias_rem = bias_size; + + /* internal round_up too big */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_size + ps, bias_size, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_size, bias_size); + + /* size too big */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_size + ps, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_size + ps, ps); + + /* bias range too small for size */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start + ps, + bias_end, bias_size, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start + ps, bias_end, bias_size, ps); + + /* bias misaligned */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start + ps, + bias_end - ps, + bias_size >> 1, bias_size >> 1, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1); + + /* single big page */ + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_size, bias_size, + &tmp, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_size, bias_size); + drm_buddy_free_list(&mm, &tmp); + + /* single page with internal round_up */ + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, ps, bias_size, + &tmp, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, ps, bias_size); + drm_buddy_free_list(&mm, &tmp); + + /* random size within */ + size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); + if (size) + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, size, ps, + &tmp, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, size, ps); + + bias_rem -= size; + /* too big for current avail */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_rem + ps, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_rem + ps, ps); + + if (bias_rem) { + /* random fill of the remainder */ + size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); + size = max(size, ps); + + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, size, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, size, ps); + /* + * Intentionally allow some space to be left + * unallocated, and ideally not always on the bias + * boundaries. + */ + drm_buddy_free_list(&mm, &tmp); + } else { + list_splice_tail(&tmp, &allocated); + } + } + + kfree(order); + drm_buddy_free_list(&mm, &allocated); + drm_buddy_fini(&mm); + + /* + * Something more free-form. Idea is to pick a random starting bias + * range within the address space and then start filling it up. Also + * randomly grow the bias range in both directions as we go along. This + * should give us bias start/end which is not always uniform like above, + * and in some cases will require the allocator to jump over already + * allocated nodes in the middle of the address space. + */ + + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps), + "buddy_init failed\n"); + + bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps); + bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps); + bias_end = max(bias_end, bias_start + ps); + bias_rem = bias_end - bias_start; + + do { + u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); + + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, size, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, size, ps); + bias_rem -= size; + + /* + * Try to randomly grow the bias range in both directions, or + * only one, or perhaps don't grow at all. + */ + do { + u32 old_bias_start = bias_start; + u32 old_bias_end = bias_end; + + if (bias_start) + bias_start -= round_up(prandom_u32_state(&prng) % bias_start, ps); + if (bias_end != mm_size) + bias_end += round_up(prandom_u32_state(&prng) % (mm_size - bias_end), ps); + + bias_rem += old_bias_start - bias_start; + bias_rem += bias_end - old_bias_end; + } while (!bias_rem && (bias_start || bias_end != mm_size)); + } while (bias_rem); + + KUNIT_ASSERT_EQ(test, bias_start, 0); + KUNIT_ASSERT_EQ(test, bias_end, mm_size); + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, bias_end, + ps, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc passed with bias(%x-%x), size=%u\n", + bias_start, bias_end, ps); + + drm_buddy_free_list(&mm, &allocated); + drm_buddy_fini(&mm); +} + +static void drm_test_buddy_alloc_contiguous(struct kunit *test) +{ + const unsigned long ps = SZ_4K, mm_size = 16 * 3 * SZ_4K; + unsigned long i, n_pages, total; + struct drm_buddy_block *block; + struct drm_buddy mm; + LIST_HEAD(left); + LIST_HEAD(middle); + LIST_HEAD(right); + LIST_HEAD(allocated); + + KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps)); + + /* + * Idea is to fragment the address space by alternating block + * allocations between three different lists; one for left, middle and + * right. We can then free a list to simulate fragmentation. In + * particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION, + * including the try_harder path. + */ + + i = 0; + n_pages = mm_size / ps; + do { + struct list_head *list; + int slot = i % 3; + + if (slot == 0) + list = &left; + else if (slot == 1) + list = &middle; + else + list = &right; + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, 0, mm_size, + ps, ps, list, 0), + "buddy_alloc hit an error size=%lu\n", + ps); + } while (++i < n_pages); + + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 3 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc didn't error size=%lu\n", 3 * ps); + + drm_buddy_free_list(&mm, &middle); + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 3 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc didn't error size=%lu\n", 3 * ps); + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 2 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc didn't error size=%lu\n", 2 * ps); + + drm_buddy_free_list(&mm, &right); + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 3 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc didn't error size=%lu\n", 3 * ps); + /* + * At this point we should have enough contiguous space for 2 blocks, + * however they are never buddies (since we freed middle and right) so + * will require the try_harder logic to find them. + */ + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 2 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc hit an error size=%lu\n", 2 * ps); + + drm_buddy_free_list(&mm, &left); + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 3 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc hit an error size=%lu\n", 3 * ps); + + total = 0; + list_for_each_entry(block, &allocated, link) + total += drm_buddy_block_size(&mm, block); + + KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3); + + drm_buddy_free_list(&mm, &allocated); + drm_buddy_fini(&mm); +} + static void drm_test_buddy_alloc_pathological(struct kunit *test) { u64 mm_size, size, start = 0; @@ -275,16 +567,30 @@ static void drm_test_buddy_alloc_limit(struct kunit *test) drm_buddy_fini(&mm); } +static int drm_buddy_suite_init(struct kunit_suite *suite) +{ + while (!random_seed) + random_seed = get_random_u32(); + + kunit_info(suite, "Testing DRM buddy manager, with random_seed=0x%x\n", + random_seed); + + return 0; +} + static struct kunit_case drm_buddy_tests[] = { KUNIT_CASE(drm_test_buddy_alloc_limit), KUNIT_CASE(drm_test_buddy_alloc_optimistic), KUNIT_CASE(drm_test_buddy_alloc_pessimistic), KUNIT_CASE(drm_test_buddy_alloc_pathological), + KUNIT_CASE(drm_test_buddy_alloc_contiguous), + KUNIT_CASE(drm_test_buddy_alloc_range_bias), {} }; static struct kunit_suite drm_buddy_test_suite = { .name = "drm_buddy", + .suite_init = drm_buddy_suite_init, .test_cases = drm_buddy_tests, }; diff --git a/drivers/gpu/drm/tests/drm_mm_test.c b/drivers/gpu/drm/tests/drm_mm_test.c index 4e9247cf9977..f37c0d765865 100644 --- a/drivers/gpu/drm/tests/drm_mm_test.c +++ b/drivers/gpu/drm/tests/drm_mm_test.c @@ -157,7 +157,7 @@ static void drm_test_mm_init(struct kunit *test) /* After creation, it should all be one massive hole */ if (!assert_one_hole(test, &mm, 0, size)) { - KUNIT_FAIL(test, ""); + KUNIT_FAIL(test, "mm not one hole on creation"); goto out; } @@ -171,14 +171,14 @@ static void drm_test_mm_init(struct kunit *test) /* After filling the range entirely, there should be no holes */ if (!assert_no_holes(test, &mm)) { - KUNIT_FAIL(test, ""); + KUNIT_FAIL(test, "mm has holes when filled"); goto out; } /* And then after emptying it again, the massive hole should be back */ drm_mm_remove_node(&tmp); if (!assert_one_hole(test, &mm, 0, size)) { - KUNIT_FAIL(test, ""); + KUNIT_FAIL(test, "mm does not have single hole after emptying"); goto out; } @@ -188,13 +188,13 @@ out: static void drm_test_mm_debug(struct kunit *test) { + struct drm_printer p = drm_debug_printer(test->name); struct drm_mm mm; struct drm_mm_node nodes[2]; /* Create a small drm_mm with a couple of nodes and a few holes, and * check that the debug iterator doesn't explode over a trivial drm_mm. */ - drm_mm_init(&mm, 0, 4096); memset(nodes, 0, sizeof(nodes)); @@ -209,6 +209,9 @@ static void drm_test_mm_debug(struct kunit *test) KUNIT_ASSERT_FALSE_MSG(test, drm_mm_reserve_node(&mm, &nodes[1]), "failed to reserve node[0] {start=%lld, size=%lld)\n", nodes[0].start, nodes[0].size); + + drm_mm_print(&mm, &p); + KUNIT_SUCCEED(test); } static bool expect_insert(struct kunit *test, struct drm_mm *mm, diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index f5187b384ae9..76027960054f 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -95,11 +95,17 @@ static int ttm_global_init(void) ttm_pool_mgr_init(num_pages); ttm_tt_mgr_init(num_pages, num_dma32); - glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32); + glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32 | + __GFP_NOWARN); + /* Retry without GFP_DMA32 for platforms DMA32 is not available */ if (unlikely(glob->dummy_read_page == NULL)) { - ret = -ENOMEM; - goto out; + glob->dummy_read_page = alloc_page(__GFP_ZERO); + if (unlikely(glob->dummy_read_page == NULL)) { + ret = -ENOMEM; + goto out; + } + pr_warn("Using GFP_DMA32 fallback for dummy_read_page\n"); } INIT_LIST_HEAD(&glob->device_list); @@ -195,7 +201,7 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func bool use_dma_alloc, bool use_dma32) { struct ttm_global *glob = &ttm_glob; - int ret; + int ret, nid; if (WARN_ON(vma_manager == NULL)) return -EINVAL; @@ -215,7 +221,12 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func ttm_sys_man_init(bdev); - ttm_pool_init(&bdev->pool, dev, dev_to_node(dev), use_dma_alloc, use_dma32); + if (dev) + nid = dev_to_node(dev); + else + nid = NUMA_NO_NODE; + + ttm_pool_init(&bdev->pool, dev, nid, use_dma_alloc, use_dma32); bdev->vma_manager = vma_manager; spin_lock_init(&bdev->lru_lock); diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index b62f420a9f96..112438d965ff 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -387,7 +387,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, enum ttm_caching caching, pgoff_t start_page, pgoff_t end_page) { - struct page **pages = tt->pages; + struct page **pages = &tt->pages[start_page]; unsigned int order; pgoff_t i, nr; diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index fcff41dd2315..88f63d526b22 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -147,6 +147,13 @@ v3d_job_allocate(void **container, size_t size) return 0; } +static void +v3d_job_deallocate(void **container) +{ + kfree(*container); + *container = NULL; +} + static int v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, struct v3d_job *job, void (*free)(struct kref *ref), @@ -273,8 +280,10 @@ v3d_setup_csd_jobs_and_bos(struct drm_file *file_priv, ret = v3d_job_init(v3d, file_priv, &(*job)->base, v3d_job_free, args->in_sync, se, V3D_CSD); - if (ret) + if (ret) { + v3d_job_deallocate((void *)job); return ret; + } ret = v3d_job_allocate((void *)clean_job, sizeof(**clean_job)); if (ret) @@ -282,8 +291,10 @@ v3d_setup_csd_jobs_and_bos(struct drm_file *file_priv, ret = v3d_job_init(v3d, file_priv, *clean_job, v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); - if (ret) + if (ret) { + v3d_job_deallocate((void *)clean_job); return ret; + } (*job)->args = *args; @@ -860,8 +871,10 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, ret = v3d_job_init(v3d, file_priv, &render->base, v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER); - if (ret) + if (ret) { + v3d_job_deallocate((void *)&render); goto fail; + } render->start = args->rcl_start; render->end = args->rcl_end; @@ -874,8 +887,10 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, ret = v3d_job_init(v3d, file_priv, &bin->base, v3d_job_free, args->in_sync_bcl, &se, V3D_BIN); - if (ret) + if (ret) { + v3d_job_deallocate((void *)&bin); goto fail; + } bin->start = args->bcl_start; bin->end = args->bcl_end; @@ -892,8 +907,10 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); - if (ret) + if (ret) { + v3d_job_deallocate((void *)&clean_job); goto fail; + } last_job = clean_job; } else { @@ -1015,8 +1032,10 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, ret = v3d_job_init(v3d, file_priv, &job->base, v3d_job_free, args->in_sync, &se, V3D_TFU); - if (ret) + if (ret) { + v3d_job_deallocate((void *)&job); goto fail; + } job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles), sizeof(*job->base.bo), GFP_KERNEL); @@ -1233,8 +1252,10 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data, ret = v3d_job_init(v3d, file_priv, &cpu_job->base, v3d_job_free, 0, &se, V3D_CPU); - if (ret) + if (ret) { + v3d_job_deallocate((void *)&cpu_job); goto fail; + } clean_job = cpu_job->indirect_csd.clean_job; csd_job = cpu_job->indirect_csd.job; diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index f8e9abe647b9..9539aa28937f 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -94,6 +94,7 @@ static int virtio_gpu_probe(struct virtio_device *vdev) goto err_free; } + dma_set_max_seg_size(dev->dev, dma_max_mapping_size(dev->dev) ?: UINT_MAX); ret = virtio_gpu_init(vdev, dev); if (ret) goto err_free; diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index 3062e0e0d467..79ba98a169f9 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -50,8 +50,8 @@ #define HOST2GUC_SELF_CFG_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u) #define HOST2GUC_SELF_CFG_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 -#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY (0xffff << 16) -#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN (0xffff << 0) +#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY (0xffffu << 16) +#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN (0xffffu << 0) #define HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32 GUC_HXG_REQUEST_MSG_n_DATAn #define HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64 GUC_HXG_REQUEST_MSG_n_DATAn diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h index 811add10c30d..c165e26c0976 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h @@ -242,8 +242,8 @@ struct slpc_shared_data { (HOST2GUC_PC_SLPC_REQUEST_REQUEST_MSG_MIN_LEN + \ HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS) #define HOST2GUC_PC_SLPC_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 -#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID (0xff << 8) -#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC (0xff << 0) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID (0xffu << 8) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC (0xffu << 0) #define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N GUC_HXG_REQUEST_MSG_n_DATAn #endif diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h index 3b83f907ece4..0b1146d0c997 100644 --- a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h @@ -82,11 +82,11 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64); #define GUC_CTB_HDR_LEN 1u #define GUC_CTB_MSG_MIN_LEN GUC_CTB_HDR_LEN #define GUC_CTB_MSG_MAX_LEN 256u -#define GUC_CTB_MSG_0_FENCE (0xffff << 16) -#define GUC_CTB_MSG_0_FORMAT (0xf << 12) +#define GUC_CTB_MSG_0_FENCE (0xffffu << 16) +#define GUC_CTB_MSG_0_FORMAT (0xfu << 12) #define GUC_CTB_FORMAT_HXG 0u -#define GUC_CTB_MSG_0_RESERVED (0xf << 8) -#define GUC_CTB_MSG_0_NUM_DWORDS (0xff << 0) +#define GUC_CTB_MSG_0_RESERVED (0xfu << 8) +#define GUC_CTB_MSG_0_NUM_DWORDS (0xffu << 0) /** * DOC: CTB HXG Message diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 47094b9b044c..0400bc0fccdc 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -31,9 +31,9 @@ */ #define GUC_KLV_LEN_MIN 1u -#define GUC_KLV_0_KEY (0xffff << 16) -#define GUC_KLV_0_LEN (0xffff << 0) -#define GUC_KLV_n_VALUE (0xffffffff << 0) +#define GUC_KLV_0_KEY (0xffffu << 16) +#define GUC_KLV_0_LEN (0xffffu << 0) +#define GUC_KLV_n_VALUE (0xffffffffu << 0) /** * DOC: GuC Self Config KLVs diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h index 3d199016cf88..29e414c82d56 100644 --- a/drivers/gpu/drm/xe/abi/guc_messages_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h @@ -40,18 +40,18 @@ */ #define GUC_HXG_MSG_MIN_LEN 1u -#define GUC_HXG_MSG_0_ORIGIN (0x1 << 31) +#define GUC_HXG_MSG_0_ORIGIN (0x1u << 31) #define GUC_HXG_ORIGIN_HOST 0u #define GUC_HXG_ORIGIN_GUC 1u -#define GUC_HXG_MSG_0_TYPE (0x7 << 28) +#define GUC_HXG_MSG_0_TYPE (0x7u << 28) #define GUC_HXG_TYPE_REQUEST 0u #define GUC_HXG_TYPE_EVENT 1u #define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u #define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u #define GUC_HXG_TYPE_RESPONSE_FAILURE 6u #define GUC_HXG_TYPE_RESPONSE_SUCCESS 7u -#define GUC_HXG_MSG_0_AUX (0xfffffff << 0) -#define GUC_HXG_MSG_n_PAYLOAD (0xffffffff << 0) +#define GUC_HXG_MSG_0_AUX (0xfffffffu << 0) +#define GUC_HXG_MSG_n_PAYLOAD (0xffffffffu << 0) /** * DOC: HXG Request @@ -85,8 +85,8 @@ */ #define GUC_HXG_REQUEST_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN -#define GUC_HXG_REQUEST_MSG_0_DATA0 (0xfff << 16) -#define GUC_HXG_REQUEST_MSG_0_ACTION (0xffff << 0) +#define GUC_HXG_REQUEST_MSG_0_DATA0 (0xfffu << 16) +#define GUC_HXG_REQUEST_MSG_0_ACTION (0xffffu << 0) #define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD /** @@ -117,8 +117,8 @@ */ #define GUC_HXG_EVENT_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN -#define GUC_HXG_EVENT_MSG_0_DATA0 (0xfff << 16) -#define GUC_HXG_EVENT_MSG_0_ACTION (0xffff << 0) +#define GUC_HXG_EVENT_MSG_0_DATA0 (0xfffu << 16) +#define GUC_HXG_EVENT_MSG_0_ACTION (0xffffu << 0) #define GUC_HXG_EVENT_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD /** @@ -188,8 +188,8 @@ */ #define GUC_HXG_FAILURE_MSG_LEN GUC_HXG_MSG_MIN_LEN -#define GUC_HXG_FAILURE_MSG_0_HINT (0xfff << 16) -#define GUC_HXG_FAILURE_MSG_0_ERROR (0xffff << 0) +#define GUC_HXG_FAILURE_MSG_0_HINT (0xfffu << 16) +#define GUC_HXG_FAILURE_MSG_0_ERROR (0xffffu << 0) /** * DOC: HXG Response diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h index 5f19550cc845..777c20ceabab 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h @@ -10,7 +10,7 @@ #include "xe_bo.h" -#define i915_gem_object_is_shmem(obj) ((obj)->flags & XE_BO_CREATE_SYSTEM_BIT) +#define i915_gem_object_is_shmem(obj) (0) /* We don't use shmem */ static inline dma_addr_t i915_gem_object_get_dma_address(const struct xe_bo *bo, pgoff_t n) { @@ -35,12 +35,10 @@ static inline int i915_gem_object_read_from_page(struct xe_bo *bo, u32 ofs, u64 *ptr, u32 size) { struct ttm_bo_kmap_obj map; - void *virtual; + void *src; bool is_iomem; int ret; - XE_WARN_ON(size != 8); - ret = xe_bo_lock(bo, true); if (ret) return ret; @@ -50,11 +48,12 @@ static inline int i915_gem_object_read_from_page(struct xe_bo *bo, goto out_unlock; ofs &= ~PAGE_MASK; - virtual = ttm_kmap_obj_virtual(&map, &is_iomem); + src = ttm_kmap_obj_virtual(&map, &is_iomem); + src += ofs; if (is_iomem) - *ptr = readq((void __iomem *)(virtual + ofs)); + memcpy_fromio(ptr, (void __iomem *)src, size); else - *ptr = *(u64 *)(virtual + ofs); + memcpy(ptr, src, size); ttm_bo_kunmap(&map); out_unlock: diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index a6523df0f1d3..c347e2c29f81 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -114,21 +114,21 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, region | XE_BO_NEEDS_CPU_ACCESS); if (IS_ERR(remote)) { - KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %li\n", - str, PTR_ERR(remote)); + KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %pe\n", + str, remote); return; } err = xe_bo_validate(remote, NULL, false); if (err) { - KUNIT_FAIL(test, "Failed to validate system bo for %s: %li\n", + KUNIT_FAIL(test, "Failed to validate system bo for %s: %i\n", str, err); goto out_unlock; } err = xe_bo_vmap(remote); if (err) { - KUNIT_FAIL(test, "Failed to vmap system bo for %s: %li\n", + KUNIT_FAIL(test, "Failed to vmap system bo for %s: %i\n", str, err); goto out_unlock; } diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c index ef56bd517b28..421b819fd4ba 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs_test.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.c @@ -21,4 +21,5 @@ kunit_test_suite(xe_mocs_test_suite); MODULE_AUTHOR("Intel Corporation"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("xe_mocs kunit test"); MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c index a53c22a19582..b4715b78ef3b 100644 --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -74,9 +74,6 @@ static const struct platform_test_case cases[] = { SUBPLATFORM_CASE(DG2, G11, B1), SUBPLATFORM_CASE(DG2, G12, A0), SUBPLATFORM_CASE(DG2, G12, A1), - PLATFORM_CASE(PVC, B0), - PLATFORM_CASE(PVC, B1), - PLATFORM_CASE(PVC, C0), GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0), GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0), GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0), diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 0b0e262e2166..4d3b80ec906d 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -28,6 +28,14 @@ #include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" +const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES] = { + [XE_PL_SYSTEM] = "system", + [XE_PL_TT] = "gtt", + [XE_PL_VRAM0] = "vram0", + [XE_PL_VRAM1] = "vram1", + [XE_PL_STOLEN] = "stolen" +}; + static const struct ttm_place sys_placement_flags = { .fpfn = 0, .lpfn = 0, @@ -713,8 +721,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, migrate = xe->tiles[0].migrate; xe_assert(xe, migrate); - - trace_xe_bo_move(bo); + trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source); xe_device_mem_access_get(xe); if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 9b1279aca127..8be42ac6cd07 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -243,6 +243,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo); int xe_bo_restore_pinned(struct xe_bo *bo); extern struct ttm_device_funcs xe_ttm_funcs; +extern const char *const xe_mem_type_to_name[]; int xe_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index b8d8da546670..5176c27e4b6a 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -83,9 +83,6 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) return 0; } -static void device_kill_persistent_exec_queues(struct xe_device *xe, - struct xe_file *xef); - static void xe_file_close(struct drm_device *dev, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); @@ -102,8 +99,6 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) mutex_unlock(&xef->exec_queue.lock); xa_destroy(&xef->exec_queue.xa); mutex_destroy(&xef->exec_queue.lock); - device_kill_persistent_exec_queues(xe, xef); - mutex_lock(&xef->vm.lock); xa_for_each(&xef->vm.xa, idx, vm) xe_vm_close_and_put(vm); @@ -255,9 +250,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xa_erase(&xe->usm.asid_to_vm, asid); } - drmm_mutex_init(&xe->drm, &xe->persistent_engines.lock); - INIT_LIST_HEAD(&xe->persistent_engines.list); - spin_lock_init(&xe->pinned.lock); INIT_LIST_HEAD(&xe->pinned.kernel_bo_present); INIT_LIST_HEAD(&xe->pinned.external_vram); @@ -570,37 +562,6 @@ void xe_device_shutdown(struct xe_device *xe) { } -void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q) -{ - mutex_lock(&xe->persistent_engines.lock); - list_add_tail(&q->persistent.link, &xe->persistent_engines.list); - mutex_unlock(&xe->persistent_engines.lock); -} - -void xe_device_remove_persistent_exec_queues(struct xe_device *xe, - struct xe_exec_queue *q) -{ - mutex_lock(&xe->persistent_engines.lock); - if (!list_empty(&q->persistent.link)) - list_del(&q->persistent.link); - mutex_unlock(&xe->persistent_engines.lock); -} - -static void device_kill_persistent_exec_queues(struct xe_device *xe, - struct xe_file *xef) -{ - struct xe_exec_queue *q, *next; - - mutex_lock(&xe->persistent_engines.lock); - list_for_each_entry_safe(q, next, &xe->persistent_engines.list, - persistent.link) - if (q->persistent.xef == xef) { - xe_exec_queue_kill(q); - list_del_init(&q->persistent.link); - } - mutex_unlock(&xe->persistent_engines.lock); -} - void xe_device_wmb(struct xe_device *xe) { struct xe_gt *gt = xe_root_mmio_gt(xe); @@ -613,7 +574,7 @@ void xe_device_wmb(struct xe_device *xe) u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) { return xe_device_has_flat_ccs(xe) ? - DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0; + DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0; } bool xe_device_mem_access_ongoing(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 3da83b233206..08d8b72c7731 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -42,10 +42,6 @@ int xe_device_probe(struct xe_device *xe); void xe_device_remove(struct xe_device *xe); void xe_device_shutdown(struct xe_device *xe); -void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q); -void xe_device_remove_persistent_exec_queues(struct xe_device *xe, - struct xe_exec_queue *q); - void xe_device_wmb(struct xe_device *xe); static inline struct xe_file *to_xe_file(const struct drm_file *file) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 5dc9127a2029..e8491979a6f2 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -341,14 +341,6 @@ struct xe_device { struct mutex lock; } usm; - /** @persistent_engines: engines that are closed but still running */ - struct { - /** @lock: protects persistent engines */ - struct mutex lock; - /** @list: list of persistent engines */ - struct list_head list; - } persistent_engines; - /** @pinned: pinned BO state */ struct { /** @lock: protected pinned BO list state */ diff --git a/drivers/gpu/drm/xe/xe_display.c b/drivers/gpu/drm/xe/xe_display.c index 74391d9b11ae..e4db069f0db3 100644 --- a/drivers/gpu/drm/xe/xe_display.c +++ b/drivers/gpu/drm/xe/xe_display.c @@ -134,8 +134,6 @@ static void xe_display_fini_nommio(struct drm_device *dev, void *dummy) int xe_display_init_nommio(struct xe_device *xe) { - int err; - if (!xe->info.enable_display) return 0; @@ -145,10 +143,6 @@ int xe_display_init_nommio(struct xe_device *xe) /* This must be called before any calls to HAS_PCH_* */ intel_detect_pch(xe); - err = intel_power_domains_init(xe); - if (err) - return err; - return drmm_add_action_or_reset(&xe->drm, xe_display_fini_nommio, xe); } diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index 64ed303728fd..da2627ed6ae7 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -175,7 +175,7 @@ static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, return 0; } -const struct dma_buf_ops xe_dmabuf_ops = { +static const struct dma_buf_ops xe_dmabuf_ops = { .attach = xe_dma_buf_attach, .detach = xe_dma_buf_detach, .pin = xe_dma_buf_pin, diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 82d1305e831f..6040e4d22b28 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -131,14 +131,6 @@ static void bo_meminfo(struct xe_bo *bo, static void show_meminfo(struct drm_printer *p, struct drm_file *file) { - static const char *const mem_type_to_name[TTM_NUM_MEM_TYPES] = { - [XE_PL_SYSTEM] = "system", - [XE_PL_TT] = "gtt", - [XE_PL_VRAM0] = "vram0", - [XE_PL_VRAM1] = "vram1", - [4 ... 6] = NULL, - [XE_PL_STOLEN] = "stolen" - }; struct drm_memory_stats stats[TTM_NUM_MEM_TYPES] = {}; struct xe_file *xef = file->driver_priv; struct ttm_device *bdev = &xef->xe->ttm; @@ -171,7 +163,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) spin_unlock(&client->bos_lock); for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) { - if (!mem_type_to_name[mem_type]) + if (!xe_mem_type_to_name[mem_type]) continue; man = ttm_manager_type(bdev, mem_type); @@ -182,7 +174,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) DRM_GEM_OBJECT_RESIDENT | (mem_type != XE_PL_SYSTEM ? 0 : DRM_GEM_OBJECT_PURGEABLE), - mem_type_to_name[mem_type]); + xe_mem_type_to_name[mem_type]); } } } diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index b853feed9ccc..17f26952e665 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -111,7 +111,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u64 addresses[XE_HW_ENGINE_MAX_INSTANCE]; struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn}; struct drm_exec *exec = &vm_exec.exec; - u32 i, num_syncs = 0; + u32 i, num_syncs = 0, num_ufence = 0; struct xe_sched_job *job; struct dma_fence *rebind_fence; struct xe_vm *vm; @@ -157,6 +157,14 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) SYNC_PARSE_FLAG_LR_MODE : 0)); if (err) goto err_syncs; + + if (xe_sync_is_ufence(&syncs[i])) + num_ufence++; + } + + if (XE_IOCTL_DBG(xe, num_ufence > 1)) { + err = -EINVAL; + goto err_syncs; } if (xe_exec_queue_is_parallel(q)) { diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index bcfc4127c7c5..49223026c89f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -60,7 +60,6 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, q->fence_irq = >->fence_irq[hwe->class]; q->ring_ops = gt->ring_ops[hwe->class]; q->ops = gt->exec_queue_ops; - INIT_LIST_HEAD(&q->persistent.link); INIT_LIST_HEAD(&q->compute.link); INIT_LIST_HEAD(&q->multi_gt_link); @@ -310,102 +309,6 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue * return q->ops->set_timeslice(q, value); } -static int exec_queue_set_preemption_timeout(struct xe_device *xe, - struct xe_exec_queue *q, u64 value, - bool create) -{ - u32 min = 0, max = 0; - - xe_exec_queue_get_prop_minmax(q->hwe->eclass, - XE_EXEC_QUEUE_PREEMPT_TIMEOUT, &min, &max); - - if (xe_exec_queue_enforce_schedule_limit() && - !xe_hw_engine_timeout_in_range(value, min, max)) - return -EINVAL; - - return q->ops->set_preempt_timeout(q, value); -} - -static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, xe_vm_in_preempt_fence_mode(q->vm))) - return -EINVAL; - - if (value) - q->flags |= EXEC_QUEUE_FLAG_PERSISTENT; - else - q->flags &= ~EXEC_QUEUE_FLAG_PERSISTENT; - - return 0; -} - -static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - u32 min = 0, max = 0; - - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - xe_exec_queue_get_prop_minmax(q->hwe->eclass, - XE_EXEC_QUEUE_JOB_TIMEOUT, &min, &max); - - if (xe_exec_queue_enforce_schedule_limit() && - !xe_hw_engine_timeout_in_range(value, min, max)) - return -EINVAL; - - return q->ops->set_job_timeout(q, value); -} - -static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - q->usm.acc_trigger = value; - - return 0; -} - -static int exec_queue_set_acc_notify(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - q->usm.acc_notify = value; - - return 0; -} - -static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - if (value > DRM_XE_ACC_GRANULARITY_64M) - return -EINVAL; - - q->usm.acc_granularity = value; - - return 0; -} - typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, struct xe_exec_queue *q, u64 value, bool create); @@ -413,12 +316,6 @@ typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE] = exec_queue_set_persistence, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity, }; static int exec_queue_user_ext_set_property(struct xe_device *xe, @@ -437,10 +334,15 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(exec_queue_set_property_funcs)) || - XE_IOCTL_DBG(xe, ext.pad)) + XE_IOCTL_DBG(xe, ext.pad) || + XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); + if (!exec_queue_set_property_funcs[idx]) + return -EINVAL; + return exec_queue_set_property_funcs[idx](xe, q, ext.value, create); } @@ -704,9 +606,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, } q = xe_exec_queue_create(xe, vm, logical_mask, - args->width, hwe, - xe_vm_in_lr_mode(vm) ? 0 : - EXEC_QUEUE_FLAG_PERSISTENT); + args->width, hwe, 0); up_read(&vm->lock); xe_vm_put(vm); if (IS_ERR(q)) @@ -728,8 +628,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, goto kill_exec_queue; } - q->persistent.xef = xef; - mutex_lock(&xef->exec_queue.lock); err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); mutex_unlock(&xef->exec_queue.lock); @@ -872,10 +770,7 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; - if (!(q->flags & EXEC_QUEUE_FLAG_PERSISTENT)) - xe_exec_queue_kill(q); - else - xe_device_add_persistent_exec_queues(xe, q); + xe_exec_queue_kill(q); trace_xe_exec_queue_close(q); xe_exec_queue_put(q); @@ -926,20 +821,24 @@ void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q) * @q: The exec queue * @vm: The VM the engine does a bind or exec for * - * Get last fence, does not take a ref + * Get last fence, takes a ref * * Returns: last fence if not signaled, dma fence stub if signaled */ struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, struct xe_vm *vm) { + struct dma_fence *fence; + xe_exec_queue_last_fence_lockdep_assert(q, vm); if (q->last_fence && test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) xe_exec_queue_last_fence_put(q, vm); - return q->last_fence ? q->last_fence : dma_fence_get_stub(); + fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); + dma_fence_get(fence); + return fence; } /** diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 8d4b7feb8c30..36f4901d8d7e 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -105,16 +105,6 @@ struct xe_exec_queue { struct xe_guc_exec_queue *guc; }; - /** - * @persistent: persistent exec queue state - */ - struct { - /** @xef: file which this exec queue belongs to */ - struct xe_file *xef; - /** @link: link in list of persistent exec queues */ - struct list_head link; - } persistent; - union { /** * @parallel: parallel submission state @@ -160,16 +150,6 @@ struct xe_exec_queue { spinlock_t lock; } compute; - /** @usm: unified shared memory state */ - struct { - /** @acc_trigger: access counter trigger */ - u32 acc_trigger; - /** @acc_notify: access counter notify */ - u32 acc_notify; - /** @acc_granularity: access counter granularity */ - u32 acc_granularity; - } usm; - /** @ops: submission backend exec queue operations */ const struct xe_exec_queue_ops *ops; diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 96b5224eb478..acb4d9f38fd7 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -212,7 +212,7 @@ static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl) { struct xe_execlist_port *port = exl->port; - enum xe_exec_queue_priority priority = exl->active_priority; + enum xe_exec_queue_priority priority = exl->q->sched_props.priority; XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET); XE_WARN_ON(priority < 0); @@ -378,8 +378,6 @@ static void execlist_exec_queue_fini_async(struct work_struct *w) list_del(&exl->active_link); spin_unlock_irqrestore(&exl->port->lock, flags); - if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT) - xe_device_remove_persistent_exec_queues(xe, q); drm_sched_entity_fini(&exl->entity); drm_sched_fini(&exl->sched); kfree(exl); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 3af2adec1295..35474ddbaf97 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -437,7 +437,10 @@ static int all_fw_domain_init(struct xe_gt *gt) * USM has its only SA pool to non-block behind user operations */ if (gt_to_xe(gt)->info.has_usm) { - gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt), SZ_1M, 16); + struct xe_device *xe = gt_to_xe(gt); + + gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt), + IS_DGFX(xe) ? SZ_1M : SZ_512K, 16); if (IS_ERR(gt->usm.bb_pool)) { err = PTR_ERR(gt->usm.bb_pool); goto err_force_wake; diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 9358f7336889..9fcae65b6469 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -145,10 +145,10 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle) } if (xe_gt_is_media_type(gt)) { - sprintf(gtidle->name, "gt%d-mc\n", gt->info.id); + sprintf(gtidle->name, "gt%d-mc", gt->info.id); gtidle->idle_residency = xe_guc_pc_mc6_residency; } else { - sprintf(gtidle->name, "gt%d-rc\n", gt->info.id); + sprintf(gtidle->name, "gt%d-rc", gt->info.id); gtidle->idle_residency = xe_guc_pc_rc6_residency; } diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 77925b35cf8d..8546cd3cc50d 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -480,7 +480,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, * to synchronize with external clients (e.g., firmware), so a semaphore * register will also need to be taken. */ -static void mcr_lock(struct xe_gt *gt) +static void mcr_lock(struct xe_gt *gt) __acquires(>->mcr_lock) { struct xe_device *xe = gt_to_xe(gt); int ret = 0; @@ -500,7 +500,7 @@ static void mcr_lock(struct xe_gt *gt) drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT); } -static void mcr_unlock(struct xe_gt *gt) +static void mcr_unlock(struct xe_gt *gt) __releases(>->mcr_lock) { /* Release hardware semaphore - this is done by writing 1 to the register */ if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 59a70d2e0a7a..73f08f1924df 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -165,7 +165,8 @@ retry_userptr: goto unlock_vm; } - if (!xe_vma_is_userptr(vma) || !xe_vma_userptr_check_repin(vma)) { + if (!xe_vma_is_userptr(vma) || + !xe_vma_userptr_check_repin(to_userptr_vma(vma))) { downgrade_write(&vm->lock); write_locked = false; } @@ -181,11 +182,13 @@ retry_userptr: /* TODO: Validate fault */ if (xe_vma_is_userptr(vma) && write_locked) { + struct xe_userptr_vma *uvma = to_userptr_vma(vma); + spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&vma->userptr.invalidate_link); + list_del_init(&uvma->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); - ret = xe_vma_userptr_pin_pages(vma); + ret = xe_vma_userptr_pin_pages(uvma); if (ret) goto unlock_vm; @@ -220,7 +223,7 @@ retry_userptr: dma_fence_put(fence); if (xe_vma_is_userptr(vma)) - ret = xe_vma_userptr_check_repin(vma); + ret = xe_vma_userptr_check_repin(to_userptr_vma(vma)); vma->usm.tile_invalidated &= ~BIT(tile->id); unlock_dma_resv: @@ -332,7 +335,7 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) return -EPROTO; asid = FIELD_GET(PFD_ASID, msg[1]); - pf_queue = >->usm.pf_queue[asid % NUM_PF_QUEUE]; + pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE); spin_lock_irqsave(&pf_queue->lock, flags); full = pf_queue_full(pf_queue); diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 7eef23a00d77..f4c485289dbe 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -247,6 +247,14 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, xe_gt_assert(gt, vma); + /* Execlists not supported */ + if (gt_to_xe(gt)->info.force_execlist) { + if (fence) + __invalidation_fence_signal(fence); + + return 0; + } + action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ if (!xe->info.has_range_tlb_invalidation) { @@ -317,6 +325,10 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) struct drm_printer p = drm_err_printer(__func__); int ret; + /* Execlists not supported */ + if (gt_to_xe(gt)->info.force_execlist) + return 0; + /* * XXX: See above, this algorithm only works if seqno are always in * order diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index f71085228cb3..d91702592520 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -963,7 +963,9 @@ void xe_guc_pc_fini(struct xe_guc_pc *pc) struct xe_device *xe = pc_to_xe(pc); if (xe->info.skip_guc_pc) { + xe_device_mem_access_get(xe); xe_gt_idle_disable_c6(pc_to_gt(pc)); + xe_device_mem_access_put(xe); return; } diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 54ffcfcdd41f..f22ae717b0b2 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1028,8 +1028,6 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) if (xe_exec_queue_is_lr(q)) cancel_work_sync(&ge->lr_tdr); - if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT) - xe_device_remove_persistent_exec_queues(gt_to_xe(q->gt), q); release_guc_id(guc, q); xe_sched_entity_fini(&ge->entity); xe_sched_fini(&ge->sched); diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index a6094c81f2ad..a5de3e7b0bd6 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -217,13 +217,13 @@ struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx, if (!fence) return ERR_PTR(-ENOMEM); - dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock, - ctx->dma_fence_ctx, ctx->next_seqno++); - fence->ctx = ctx; fence->seqno_map = seqno_map; INIT_LIST_HEAD(&fence->irq_link); + dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock, + ctx->dma_fence_ctx, ctx->next_seqno++); + trace_xe_hw_fence_create(fence); return fence; diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 6ef2aa1eae8b..174ed2185481 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -419,7 +419,7 @@ static int xe_hwmon_pcode_read_i1(struct xe_gt *gt, u32 *uval) return xe_pcode_read(gt, PCODE_MBOX(PCODE_POWER_SETUP, POWER_SETUP_SUBCOMMAND_READ_I1, 0), - uval, 0); + uval, NULL); } static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index b7fa3831b684..b38319d2801e 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -21,10 +21,10 @@ #include "xe_map.h" #include "xe_vm.h" -#define CTX_VALID (1 << 0) -#define CTX_PRIVILEGE (1 << 8) -#define CTX_ADDRESSING_MODE_SHIFT 3 -#define LEGACY_64B_CONTEXT 3 +#define LRC_VALID (1 << 0) +#define LRC_PRIVILEGE (1 << 8) +#define LRC_ADDRESSING_MODE_SHIFT 3 +#define LRC_LEGACY_64B_CONTEXT 3 #define ENGINE_CLASS_SHIFT 61 #define ENGINE_INSTANCE_SHIFT 48 @@ -682,8 +682,6 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) #define PVC_CTX_ASID (0x2e + 1) #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) -#define ACC_GRANULARITY_S 20 -#define ACC_NOTIFY_S 16 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size) @@ -754,23 +752,17 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, RING_CTL_SIZE(lrc->ring.size) | RING_VALID); if (xe->info.has_asid && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, - (q->usm.acc_granularity << - ACC_GRANULARITY_S) | vm->usm.asid); - if (xe->info.has_usm && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD, - (q->usm.acc_notify << ACC_NOTIFY_S) | - q->usm.acc_trigger); - - lrc->desc = CTX_VALID; - lrc->desc |= LEGACY_64B_CONTEXT << CTX_ADDRESSING_MODE_SHIFT; + xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); + + lrc->desc = LRC_VALID; + lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT; /* TODO: Priority */ /* While this appears to have something about privileged batches or * some such, it really just means PPGTT mode. */ if (vm) - lrc->desc |= CTX_PRIVILEGE; + lrc->desc |= LRC_PRIVILEGE; if (GRAPHICS_VERx100(xe) < 1250) { lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT; diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index e05e9e7282b6..70480c305602 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -170,11 +170,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (!IS_DGFX(xe)) { /* Write out batch too */ m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE; - if (xe->info.has_usm) { - batch = tile->primary_gt->usm.bb_pool->bo; - m->usm_batch_base_ofs = m->batch_base_ofs; - } - for (i = 0; i < batch->size; i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { @@ -185,6 +180,24 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, entry); level++; } + if (xe->info.has_usm) { + xe_tile_assert(tile, batch->size == SZ_1M); + + batch = tile->primary_gt->usm.bb_pool->bo; + m->usm_batch_base_ofs = m->batch_base_ofs + SZ_1M; + xe_tile_assert(tile, batch->size == SZ_512K); + + for (i = 0; i < batch->size; + i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : + XE_PAGE_SIZE) { + entry = vm->pt_ops->pte_encode_bo(batch, i, + pat_index, 0); + + xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, + entry); + level++; + } + } } else { u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE); @@ -472,7 +485,7 @@ static void emit_pte(struct xe_migrate *m, /* Indirect access needs compression enabled uncached PAT index */ if (GRAPHICS_VERx100(xe) >= 2000) pat_index = is_comp_pte ? xe->pat.idx[XE_CACHE_NONE_COMPRESSION] : - xe->pat.idx[XE_CACHE_NONE]; + xe->pat.idx[XE_CACHE_WB]; else pat_index = xe->pat.idx[XE_CACHE_WB]; @@ -760,14 +773,14 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it)) xe_res_next(&src_it, src_L0); else - emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0, - src); + emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs, + &src_it, src_L0, src); if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) xe_res_next(&dst_it, src_L0); else - emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0, - dst); + emit_pte(m, bb, dst_L0_pt, dst_is_vram, copy_system_ccs, + &dst_it, src_L0, dst); if (copy_system_ccs) emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); @@ -1009,8 +1022,8 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) xe_res_next(&src_it, clear_L0); else - emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0, - dst); + emit_pte(m, bb, clear_L0_pt, clear_vram, clear_system_ccs, + &src_it, clear_L0, dst); bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; @@ -1204,8 +1217,11 @@ static bool no_in_syncs(struct xe_vm *vm, struct xe_exec_queue *q, } if (q) { fence = xe_exec_queue_last_fence_get(q, vm); - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { + dma_fence_put(fence); return false; + } + dma_fence_put(fence); } return true; diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index c8c5d74b6e90..02f7808f28ca 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -105,7 +105,7 @@ static void xe_resize_vram_bar(struct xe_device *xe) pci_bus_for_each_resource(root, root_res, i) { if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && - root_res->start > 0x100000000ull) + (u64)root_res->start > 0x100000000ul) break; } @@ -272,8 +272,8 @@ int xe_mmio_probe_vram(struct xe_device *xe) drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id, tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size); drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id, - &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + tile->mem.vram.actual_physical_size, - &tile->mem.vram.io_start, tile->mem.vram.io_start + tile->mem.vram.io_size); + &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + (u64)tile->mem.vram.actual_physical_size, + &tile->mem.vram.io_start, tile->mem.vram.io_start + (u64)tile->mem.vram.io_size); /* calculate total size using tile size to get the correct HW sizing */ total_size += tile_size; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index de1030a47588..6653c045f3c9 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -20,8 +20,8 @@ struct xe_pt_dir { struct xe_pt pt; - /** @dir: Directory structure for the xe_pt_walk functionality */ - struct xe_ptw_dir dir; + /** @children: Array of page-table child nodes */ + struct xe_ptw *children[XE_PDES]; }; #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) @@ -44,7 +44,7 @@ static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) { - return container_of(pt_dir->dir.entries[index], struct xe_pt, base); + return container_of(pt_dir->children[index], struct xe_pt, base); } static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, @@ -65,6 +65,14 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, XE_PTE_NULL; } +static void xe_pt_free(struct xe_pt *pt) +{ + if (pt->level) + kfree(as_xe_pt_dir(pt)); + else + kfree(pt); +} + /** * xe_pt_create() - Create a page-table. * @vm: The vm to create for. @@ -85,15 +93,19 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, { struct xe_pt *pt; struct xe_bo *bo; - size_t size; int err; - size = !level ? sizeof(struct xe_pt) : sizeof(struct xe_pt_dir) + - XE_PDES * sizeof(struct xe_ptw *); - pt = kzalloc(size, GFP_KERNEL); + if (level) { + struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL); + + pt = (dir) ? &dir->pt : NULL; + } else { + pt = kzalloc(sizeof(*pt), GFP_KERNEL); + } if (!pt) return ERR_PTR(-ENOMEM); + pt->level = level; bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, ttm_bo_type_kernel, XE_BO_CREATE_VRAM_IF_DGFX(tile) | @@ -106,8 +118,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, goto err_kfree; } pt->bo = bo; - pt->level = level; - pt->base.dir = level ? &as_xe_pt_dir(pt)->dir : NULL; + pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; if (vm->xef) xe_drm_client_add_bo(vm->xef->client, pt->bo); @@ -116,7 +127,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, return pt; err_kfree: - kfree(pt); + xe_pt_free(pt); return ERR_PTR(err); } @@ -193,7 +204,7 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) deferred); } } - kfree(pt); + xe_pt_free(pt); } /** @@ -358,7 +369,7 @@ xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, struct iosys_map *map = &parent->bo->vmap; if (unlikely(xe_child)) - parent->base.dir->entries[offset] = &xe_child->base; + parent->base.children[offset] = &xe_child->base; xe_pt_write(xe_walk->vm->xe, map, offset, pte); parent->num_live++; @@ -488,10 +499,12 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, * this device *requires* 64K PTE size for VRAM, fail. */ if (level == 0 && !xe_parent->is_compact) { - if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) + if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { + xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K; pte |= XE_PTE_PS64; - else if (XE_WARN_ON(xe_walk->needs_64K)) + } else if (XE_WARN_ON(xe_walk->needs_64K)) { return -EINVAL; + } } ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); @@ -534,13 +547,16 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, *child = &xe_child->base; /* - * Prefer the compact pagetable layout for L0 if possible. + * Prefer the compact pagetable layout for L0 if possible. Only + * possible if VMA covers entire 2MB region as compact 64k and + * 4k pages cannot be mixed within a 2MB region. * TODO: Suballocate the pt bo to avoid wasting a lot of * memory. */ if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && covers && xe_pt_scan_64K(addr, next, xe_walk)) { walk->shifts = xe_compact_pt_shifts; + xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT; flags |= XE_PDE_64K; xe_child->is_compact = true; } @@ -618,8 +634,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, if (!xe_vma_is_null(vma)) { if (xe_vma_is_userptr(vma)) - xe_res_first_sg(vma->userptr.sg, 0, xe_vma_size(vma), - &curs); + xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, + xe_vma_size(vma), &curs); else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), xe_vma_size(vma), &curs); @@ -853,7 +869,7 @@ static void xe_pt_commit_bind(struct xe_vma *vma, xe_pt_destroy(xe_pt_entry(pt_dir, j_), xe_vma_vm(vma)->flags, deferred); - pt_dir->dir.entries[j_] = &newpte->base; + pt_dir->children[j_] = &newpte->base; } kfree(entries[i].pt_entries); } @@ -906,17 +922,17 @@ static void xe_vm_dbg_print_entries(struct xe_device *xe, #ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT -static int xe_pt_userptr_inject_eagain(struct xe_vma *vma) +static int xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) { - u32 divisor = vma->userptr.divisor ? vma->userptr.divisor : 2; + u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; static u32 count; if (count++ % divisor == divisor - 1) { - struct xe_vm *vm = xe_vma_vm(vma); + struct xe_vm *vm = xe_vma_vm(&uvma->vma); - vma->userptr.divisor = divisor << 1; + uvma->userptr.divisor = divisor << 1; spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&vma->userptr.invalidate_link, + list_move_tail(&uvma->userptr.invalidate_link, &vm->userptr.invalidated); spin_unlock(&vm->userptr.invalidated_lock); return true; @@ -927,7 +943,7 @@ static int xe_pt_userptr_inject_eagain(struct xe_vma *vma) #else -static bool xe_pt_userptr_inject_eagain(struct xe_vma *vma) +static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) { return false; } @@ -1000,9 +1016,9 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) { struct xe_pt_migrate_pt_update *userptr_update = container_of(pt_update, typeof(*userptr_update), base); - struct xe_vma *vma = pt_update->vma; - unsigned long notifier_seq = vma->userptr.notifier_seq; - struct xe_vm *vm = xe_vma_vm(vma); + struct xe_userptr_vma *uvma = to_userptr_vma(pt_update->vma); + unsigned long notifier_seq = uvma->userptr.notifier_seq; + struct xe_vm *vm = xe_vma_vm(&uvma->vma); int err = xe_pt_vm_dependencies(pt_update->job, &vm->rftree[pt_update->tile_id], pt_update->start, @@ -1023,7 +1039,7 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) */ do { down_read(&vm->userptr.notifier_lock); - if (!mmu_interval_read_retry(&vma->userptr.notifier, + if (!mmu_interval_read_retry(&uvma->userptr.notifier, notifier_seq)) break; @@ -1032,11 +1048,11 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) if (userptr_update->bind) return -EAGAIN; - notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier); + notifier_seq = mmu_interval_read_begin(&uvma->userptr.notifier); } while (true); /* Inject errors to test_whether they are handled correctly */ - if (userptr_update->bind && xe_pt_userptr_inject_eagain(vma)) { + if (userptr_update->bind && xe_pt_userptr_inject_eagain(uvma)) { up_read(&vm->userptr.notifier_lock); return -EAGAIN; } @@ -1297,7 +1313,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue vma->tile_present |= BIT(tile->id); if (bind_pt_update.locked) { - vma->userptr.initial_bind = true; + to_userptr_vma(vma)->userptr.initial_bind = true; up_read(&vm->userptr.notifier_lock); xe_bo_put_commit(&deferred); } @@ -1507,7 +1523,7 @@ xe_pt_commit_unbind(struct xe_vma *vma, xe_pt_destroy(xe_pt_entry(pt_dir, i), xe_vma_vm(vma)->flags, deferred); - pt_dir->dir.entries[i] = NULL; + pt_dir->children[i] = NULL; } } } @@ -1642,7 +1658,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu if (!vma->tile_present) { spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&vma->userptr.invalidate_link); + list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); } up_read(&vm->userptr.notifier_lock); diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c index 8f6c8d063f39..b8b3d2aea492 100644 --- a/drivers/gpu/drm/xe/xe_pt_walk.c +++ b/drivers/gpu/drm/xe/xe_pt_walk.c @@ -74,7 +74,7 @@ int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level, u64 addr, u64 end, struct xe_pt_walk *walk) { pgoff_t offset = xe_pt_offset(addr, level, walk); - struct xe_ptw **entries = parent->dir ? parent->dir->entries : NULL; + struct xe_ptw **entries = parent->children ? parent->children : NULL; const struct xe_pt_walk_ops *ops = walk->ops; enum page_walk_action action; struct xe_ptw *child; diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h index ec3d1e9efa6d..5ecc4d2f0f65 100644 --- a/drivers/gpu/drm/xe/xe_pt_walk.h +++ b/drivers/gpu/drm/xe/xe_pt_walk.h @@ -8,28 +8,15 @@ #include <linux/pagewalk.h> #include <linux/types.h> -struct xe_ptw_dir; - /** * struct xe_ptw - base class for driver pagetable subclassing. - * @dir: Pointer to an array of children if any. + * @children: Pointer to an array of children if any. * * Drivers could subclass this, and if it's a page-directory, typically - * embed the xe_ptw_dir::entries array in the same allocation. + * embed an array of xe_ptw pointers. */ struct xe_ptw { - struct xe_ptw_dir *dir; -}; - -/** - * struct xe_ptw_dir - page directory structure - * @entries: Array holding page directory children. - * - * It is the responsibility of the user to ensure @entries is - * correctly sized. - */ -struct xe_ptw_dir { - struct xe_ptw *entries[0]; + struct xe_ptw **children; }; /** diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 9b35673b286c..7e924faeeea0 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -459,21 +459,21 @@ static size_t calc_topo_query_size(struct xe_device *xe) sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss)); } -static void __user *copy_mask(void __user *ptr, - struct drm_xe_query_topology_mask *topo, - void *mask, size_t mask_size) +static int copy_mask(void __user **ptr, + struct drm_xe_query_topology_mask *topo, + void *mask, size_t mask_size) { topo->num_bytes = mask_size; - if (copy_to_user(ptr, topo, sizeof(*topo))) - return ERR_PTR(-EFAULT); - ptr += sizeof(topo); + if (copy_to_user(*ptr, topo, sizeof(*topo))) + return -EFAULT; + *ptr += sizeof(topo); - if (copy_to_user(ptr, mask, mask_size)) - return ERR_PTR(-EFAULT); - ptr += mask_size; + if (copy_to_user(*ptr, mask, mask_size)) + return -EFAULT; + *ptr += mask_size; - return ptr; + return 0; } static int query_gt_topology(struct xe_device *xe, @@ -493,28 +493,28 @@ static int query_gt_topology(struct xe_device *xe, } for_each_gt(gt, xe, id) { + int err; + topo.gt_id = id; topo.type = DRM_XE_TOPO_DSS_GEOMETRY; - query_ptr = copy_mask(query_ptr, &topo, - gt->fuse_topo.g_dss_mask, - sizeof(gt->fuse_topo.g_dss_mask)); - if (IS_ERR(query_ptr)) - return PTR_ERR(query_ptr); + err = copy_mask(&query_ptr, &topo, gt->fuse_topo.g_dss_mask, + sizeof(gt->fuse_topo.g_dss_mask)); + if (err) + return err; topo.type = DRM_XE_TOPO_DSS_COMPUTE; - query_ptr = copy_mask(query_ptr, &topo, - gt->fuse_topo.c_dss_mask, - sizeof(gt->fuse_topo.c_dss_mask)); - if (IS_ERR(query_ptr)) - return PTR_ERR(query_ptr); + err = copy_mask(&query_ptr, &topo, gt->fuse_topo.c_dss_mask, + sizeof(gt->fuse_topo.c_dss_mask)); + if (err) + return err; topo.type = DRM_XE_TOPO_EU_PER_DSS; - query_ptr = copy_mask(query_ptr, &topo, - gt->fuse_topo.eu_mask_per_dss, - sizeof(gt->fuse_topo.eu_mask_per_dss)); - if (IS_ERR(query_ptr)) - return PTR_ERR(query_ptr); + err = copy_mask(&query_ptr, &topo, + gt->fuse_topo.eu_mask_per_dss, + sizeof(gt->fuse_topo.eu_mask_per_dss)); + if (err) + return err; } return 0; diff --git a/drivers/gpu/drm/xe/xe_range_fence.c b/drivers/gpu/drm/xe/xe_range_fence.c index d35d9ec58e86..372378e89e98 100644 --- a/drivers/gpu/drm/xe/xe_range_fence.c +++ b/drivers/gpu/drm/xe/xe_range_fence.c @@ -151,6 +151,11 @@ xe_range_fence_tree_next(struct xe_range_fence *rfence, u64 start, u64 last) return xe_range_fence_tree_iter_next(rfence, start, last); } +static void xe_range_fence_free(struct xe_range_fence *rfence) +{ + kfree(rfence); +} + const struct xe_range_fence_ops xe_range_fence_kfree_ops = { - .free = (void (*)(struct xe_range_fence *rfence)) kfree, + .free = xe_range_fence_free, }; diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 01106a1156ad..4e2ccad0e52f 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -274,7 +274,6 @@ int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm) struct dma_fence *fence; fence = xe_exec_queue_last_fence_get(job->q, vm); - dma_fence_get(fence); return drm_sched_job_add_dependency(&job->drm, fence); } diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index e4c220cf9115..02c9577fe418 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -19,7 +19,7 @@ #include "xe_macros.h" #include "xe_sched_job_types.h" -struct user_fence { +struct xe_user_fence { struct xe_device *xe; struct kref refcount; struct dma_fence_cb cb; @@ -27,31 +27,32 @@ struct user_fence { struct mm_struct *mm; u64 __user *addr; u64 value; + int signalled; }; static void user_fence_destroy(struct kref *kref) { - struct user_fence *ufence = container_of(kref, struct user_fence, + struct xe_user_fence *ufence = container_of(kref, struct xe_user_fence, refcount); mmdrop(ufence->mm); kfree(ufence); } -static void user_fence_get(struct user_fence *ufence) +static void user_fence_get(struct xe_user_fence *ufence) { kref_get(&ufence->refcount); } -static void user_fence_put(struct user_fence *ufence) +static void user_fence_put(struct xe_user_fence *ufence) { kref_put(&ufence->refcount, user_fence_destroy); } -static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, - u64 value) +static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, + u64 value) { - struct user_fence *ufence; + struct xe_user_fence *ufence; ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); if (!ufence) @@ -69,7 +70,7 @@ static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, static void user_fence_worker(struct work_struct *w) { - struct user_fence *ufence = container_of(w, struct user_fence, worker); + struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker); if (mmget_not_zero(ufence->mm)) { kthread_use_mm(ufence->mm); @@ -80,10 +81,11 @@ static void user_fence_worker(struct work_struct *w) } wake_up_all(&ufence->xe->ufence_wq); + WRITE_ONCE(ufence->signalled, 1); user_fence_put(ufence); } -static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) +static void kick_ufence(struct xe_user_fence *ufence, struct dma_fence *fence) { INIT_WORK(&ufence->worker, user_fence_worker); queue_work(ufence->xe->ordered_wq, &ufence->worker); @@ -92,7 +94,7 @@ static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { - struct user_fence *ufence = container_of(cb, struct user_fence, cb); + struct xe_user_fence *ufence = container_of(cb, struct xe_user_fence, cb); kick_ufence(ufence, fence); } @@ -307,7 +309,6 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, /* Easy case... */ if (!num_in_fence) { fence = xe_exec_queue_last_fence_get(q, vm); - dma_fence_get(fence); return fence; } @@ -322,7 +323,6 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, } } fences[current_fence++] = xe_exec_queue_last_fence_get(q, vm); - dma_fence_get(fences[current_fence - 1]); cf = dma_fence_array_create(num_in_fence, fences, vm->composite_fence_ctx, vm->composite_fence_seqno++, @@ -342,3 +342,39 @@ err_out: return ERR_PTR(-ENOMEM); } + +/** + * xe_sync_ufence_get() - Get user fence from sync + * @sync: input sync + * + * Get a user fence reference from sync. + * + * Return: xe_user_fence pointer with reference + */ +struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync) +{ + user_fence_get(sync->ufence); + + return sync->ufence; +} + +/** + * xe_sync_ufence_put() - Put user fence reference + * @ufence: user fence reference + * + */ +void xe_sync_ufence_put(struct xe_user_fence *ufence) +{ + user_fence_put(ufence); +} + +/** + * xe_sync_ufence_get_status() - Get user fence status + * @ufence: user fence + * + * Return: 1 if signalled, 0 not signalled, <0 on error + */ +int xe_sync_ufence_get_status(struct xe_user_fence *ufence) +{ + return READ_ONCE(ufence->signalled); +} diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index d284afbe917c..0fd0d51208e6 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -33,4 +33,13 @@ struct dma_fence * xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, struct xe_exec_queue *q, struct xe_vm *vm); +static inline bool xe_sync_is_ufence(struct xe_sync_entry *sync) +{ + return !!sync->ufence; +} + +struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync); +void xe_sync_ufence_put(struct xe_user_fence *ufence); +int xe_sync_ufence_get_status(struct xe_user_fence *ufence); + #endif diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h index 852db5e7884f..30ac3f51993b 100644 --- a/drivers/gpu/drm/xe/xe_sync_types.h +++ b/drivers/gpu/drm/xe/xe_sync_types.h @@ -18,7 +18,7 @@ struct xe_sync_entry { struct drm_syncobj *syncobj; struct dma_fence *fence; struct dma_fence_chain *chain_fence; - struct user_fence *ufence; + struct xe_user_fence *ufence; u64 addr; u64 timeline_value; u32 type; diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 044c20881de7..0650b2fa75ef 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -167,9 +167,10 @@ int xe_tile_init_noalloc(struct xe_tile *tile) goto err_mem_access; tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); - if (IS_ERR(tile->mem.kernel_bb_pool)) + if (IS_ERR(tile->mem.kernel_bb_pool)) { err = PTR_ERR(tile->mem.kernel_bb_pool); - + goto err_mem_access; + } xe_wa_apply_tile_workarounds(tile); xe_tile_sysfs_init(tile); diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 95163c303f3e..4ddc55527f9a 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -12,6 +12,7 @@ #include <linux/tracepoint.h> #include <linux/types.h> +#include "xe_bo.h" #include "xe_bo_types.h" #include "xe_exec_queue_types.h" #include "xe_gpu_scheduler_types.h" @@ -26,16 +27,16 @@ DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence, TP_ARGS(fence), TP_STRUCT__entry( - __field(u64, fence) + __field(struct xe_gt_tlb_invalidation_fence *, fence) __field(int, seqno) ), TP_fast_assign( - __entry->fence = (u64)fence; + __entry->fence = fence; __entry->seqno = fence->seqno; ), - TP_printk("fence=0x%016llx, seqno=%d", + TP_printk("fence=%p, seqno=%d", __entry->fence, __entry->seqno) ); @@ -82,16 +83,16 @@ DECLARE_EVENT_CLASS(xe_bo, TP_STRUCT__entry( __field(size_t, size) __field(u32, flags) - __field(u64, vm) + __field(struct xe_vm *, vm) ), TP_fast_assign( __entry->size = bo->size; __entry->flags = bo->flags; - __entry->vm = (unsigned long)bo->vm; + __entry->vm = bo->vm; ), - TP_printk("size=%zu, flags=0x%02x, vm=0x%016llx", + TP_printk("size=%zu, flags=0x%02x, vm=%p", __entry->size, __entry->flags, __entry->vm) ); @@ -100,9 +101,31 @@ DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, TP_ARGS(bo) ); -DEFINE_EVENT(xe_bo, xe_bo_move, - TP_PROTO(struct xe_bo *bo), - TP_ARGS(bo) +TRACE_EVENT(xe_bo_move, + TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement, + bool move_lacks_source), + TP_ARGS(bo, new_placement, old_placement, move_lacks_source), + TP_STRUCT__entry( + __field(struct xe_bo *, bo) + __field(size_t, size) + __field(u32, new_placement) + __field(u32, old_placement) + __array(char, device_id, 12) + __field(bool, move_lacks_source) + ), + + TP_fast_assign( + __entry->bo = bo; + __entry->size = bo->size; + __entry->new_placement = new_placement; + __entry->old_placement = old_placement; + strscpy(__entry->device_id, dev_name(xe_bo_device(__entry->bo)->drm.dev), 12); + __entry->move_lacks_source = move_lacks_source; + ), + TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s", + __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size, + xe_mem_type_to_name[__entry->old_placement], + xe_mem_type_to_name[__entry->new_placement], __entry->device_id) ); DECLARE_EVENT_CLASS(xe_exec_queue, @@ -327,16 +350,16 @@ DECLARE_EVENT_CLASS(xe_hw_fence, TP_STRUCT__entry( __field(u64, ctx) __field(u32, seqno) - __field(u64, fence) + __field(struct xe_hw_fence *, fence) ), TP_fast_assign( __entry->ctx = fence->dma.context; __entry->seqno = fence->dma.seqno; - __entry->fence = (unsigned long)fence; + __entry->fence = fence; ), - TP_printk("ctx=0x%016llx, fence=0x%016llx, seqno=%u", + TP_printk("ctx=0x%016llx, fence=%p, seqno=%u", __entry->ctx, __entry->fence, __entry->seqno) ); @@ -365,7 +388,7 @@ DECLARE_EVENT_CLASS(xe_vma, TP_ARGS(vma), TP_STRUCT__entry( - __field(u64, vma) + __field(struct xe_vma *, vma) __field(u32, asid) __field(u64, start) __field(u64, end) @@ -373,14 +396,14 @@ DECLARE_EVENT_CLASS(xe_vma, ), TP_fast_assign( - __entry->vma = (unsigned long)vma; + __entry->vma = vma; __entry->asid = xe_vma_vm(vma)->usm.asid; __entry->start = xe_vma_start(vma); __entry->end = xe_vma_end(vma) - 1; __entry->ptr = xe_vma_userptr(vma); ), - TP_printk("vma=0x%016llx, asid=0x%05x, start=0x%012llx, end=0x%012llx, ptr=0x%012llx,", + TP_printk("vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,", __entry->vma, __entry->asid, __entry->start, __entry->end, __entry->ptr) ) @@ -465,16 +488,16 @@ DECLARE_EVENT_CLASS(xe_vm, TP_ARGS(vm), TP_STRUCT__entry( - __field(u64, vm) + __field(struct xe_vm *, vm) __field(u32, asid) ), TP_fast_assign( - __entry->vm = (unsigned long)vm; + __entry->vm = vm; __entry->asid = vm->usm.asid; ), - TP_printk("vm=0x%016llx, asid=0x%05x", __entry->vm, + TP_printk("vm=%p, asid=0x%05x", __entry->vm, __entry->asid) ); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 10b6995fbf29..3b21afe5b488 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -37,8 +37,6 @@ #include "generated/xe_wa_oob.h" #include "xe_wa.h" -#define TEST_VM_ASYNC_OPS_ERROR - static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) { return vm->gpuvm.r_obj; @@ -46,7 +44,7 @@ static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) /** * xe_vma_userptr_check_repin() - Advisory check for repin needed - * @vma: The userptr vma + * @uvma: The userptr vma * * Check if the userptr vma has been invalidated since last successful * repin. The check is advisory only and can the function can be called @@ -56,15 +54,17 @@ static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) * * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. */ -int xe_vma_userptr_check_repin(struct xe_vma *vma) +int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) { - return mmu_interval_check_retry(&vma->userptr.notifier, - vma->userptr.notifier_seq) ? + return mmu_interval_check_retry(&uvma->userptr.notifier, + uvma->userptr.notifier_seq) ? -EAGAIN : 0; } -int xe_vma_userptr_pin_pages(struct xe_vma *vma) +int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) { + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vma *vma = &uvma->vma; struct xe_vm *vm = xe_vma_vm(vma); struct xe_device *xe = vm->xe; const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT; @@ -80,30 +80,30 @@ retry: if (vma->gpuva.flags & XE_VMA_DESTROYED) return 0; - notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier); - if (notifier_seq == vma->userptr.notifier_seq) + notifier_seq = mmu_interval_read_begin(&userptr->notifier); + if (notifier_seq == userptr->notifier_seq) return 0; pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL); if (!pages) return -ENOMEM; - if (vma->userptr.sg) { + if (userptr->sg) { dma_unmap_sgtable(xe->drm.dev, - vma->userptr.sg, + userptr->sg, read_only ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL, 0); - sg_free_table(vma->userptr.sg); - vma->userptr.sg = NULL; + sg_free_table(userptr->sg); + userptr->sg = NULL; } pinned = ret = 0; if (in_kthread) { - if (!mmget_not_zero(vma->userptr.notifier.mm)) { + if (!mmget_not_zero(userptr->notifier.mm)) { ret = -EFAULT; goto mm_closed; } - kthread_use_mm(vma->userptr.notifier.mm); + kthread_use_mm(userptr->notifier.mm); } while (pinned < num_pages) { @@ -112,43 +112,40 @@ retry: num_pages - pinned, read_only ? 0 : FOLL_WRITE, &pages[pinned]); - if (ret < 0) { - if (in_kthread) - ret = 0; + if (ret < 0) break; - } pinned += ret; ret = 0; } if (in_kthread) { - kthread_unuse_mm(vma->userptr.notifier.mm); - mmput(vma->userptr.notifier.mm); + kthread_unuse_mm(userptr->notifier.mm); + mmput(userptr->notifier.mm); } mm_closed: if (ret) goto out; - ret = sg_alloc_table_from_pages_segment(&vma->userptr.sgt, pages, + ret = sg_alloc_table_from_pages_segment(&userptr->sgt, pages, pinned, 0, (u64)pinned << PAGE_SHIFT, xe_sg_segment_size(xe->drm.dev), GFP_KERNEL); if (ret) { - vma->userptr.sg = NULL; + userptr->sg = NULL; goto out; } - vma->userptr.sg = &vma->userptr.sgt; + userptr->sg = &userptr->sgt; - ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg, + ret = dma_map_sgtable(xe->drm.dev, userptr->sg, read_only ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); if (ret) { - sg_free_table(vma->userptr.sg); - vma->userptr.sg = NULL; + sg_free_table(userptr->sg); + userptr->sg = NULL; goto out; } @@ -167,8 +164,8 @@ out: kvfree(pages); if (!(ret < 0)) { - vma->userptr.notifier_seq = notifier_seq; - if (xe_vma_userptr_check_repin(vma) == -EAGAIN) + userptr->notifier_seq = notifier_seq; + if (xe_vma_userptr_check_repin(uvma) == -EAGAIN) goto retry; } @@ -635,7 +632,9 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, const struct mmu_notifier_range *range, unsigned long cur_seq) { - struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier); + struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier); + struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr); + struct xe_vma *vma = &uvma->vma; struct xe_vm *vm = xe_vma_vm(vma); struct dma_resv_iter cursor; struct dma_fence *fence; @@ -651,7 +650,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, mmu_interval_set_seq(mni, cur_seq); /* No need to stop gpu access if the userptr is not yet bound. */ - if (!vma->userptr.initial_bind) { + if (!userptr->initial_bind) { up_write(&vm->userptr.notifier_lock); return true; } @@ -663,7 +662,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, if (!xe_vm_in_fault_mode(vm) && !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) { spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&vma->userptr.invalidate_link, + list_move_tail(&userptr->invalidate_link, &vm->userptr.invalidated); spin_unlock(&vm->userptr.invalidated_lock); } @@ -703,7 +702,7 @@ static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { int xe_vm_userptr_pin(struct xe_vm *vm) { - struct xe_vma *vma, *next; + struct xe_userptr_vma *uvma, *next; int err = 0; LIST_HEAD(tmp_evict); @@ -711,22 +710,23 @@ int xe_vm_userptr_pin(struct xe_vm *vm) /* Collect invalidated userptrs */ spin_lock(&vm->userptr.invalidated_lock); - list_for_each_entry_safe(vma, next, &vm->userptr.invalidated, + list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, userptr.invalidate_link) { - list_del_init(&vma->userptr.invalidate_link); - list_move_tail(&vma->combined_links.userptr, + list_del_init(&uvma->userptr.invalidate_link); + list_move_tail(&uvma->userptr.repin_link, &vm->userptr.repin_list); } spin_unlock(&vm->userptr.invalidated_lock); /* Pin and move to temporary list */ - list_for_each_entry_safe(vma, next, &vm->userptr.repin_list, - combined_links.userptr) { - err = xe_vma_userptr_pin_pages(vma); + list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, + userptr.repin_link) { + err = xe_vma_userptr_pin_pages(uvma); if (err < 0) return err; - list_move_tail(&vma->combined_links.userptr, &vm->rebind_list); + list_del_init(&uvma->userptr.repin_link); + list_move_tail(&uvma->vma.combined_links.rebind, &vm->rebind_list); } return 0; @@ -782,6 +782,14 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) return fence; } +static void xe_vma_free(struct xe_vma *vma) +{ + if (xe_vma_is_userptr(vma)) + kfree(to_userptr_vma(vma)); + else + kfree(vma); +} + #define VMA_CREATE_FLAG_READ_ONLY BIT(0) #define VMA_CREATE_FLAG_IS_NULL BIT(1) @@ -800,14 +808,26 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, xe_assert(vm->xe, start < end); xe_assert(vm->xe, end < vm->size); - if (!bo && !is_null) /* userptr */ + /* + * Allocate and ensure that the xe_vma_is_userptr() return + * matches what was allocated. + */ + if (!bo && !is_null) { + struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); + + if (!uvma) + return ERR_PTR(-ENOMEM); + + vma = &uvma->vma; + } else { vma = kzalloc(sizeof(*vma), GFP_KERNEL); - else - vma = kzalloc(sizeof(*vma) - sizeof(struct xe_userptr), - GFP_KERNEL); - if (!vma) { - vma = ERR_PTR(-ENOMEM); - return vma; + if (!vma) + return ERR_PTR(-ENOMEM); + + if (is_null) + vma->gpuva.flags |= DRM_GPUVA_SPARSE; + if (bo) + vma->gpuva.gem.obj = &bo->ttm.base; } INIT_LIST_HEAD(&vma->combined_links.rebind); @@ -818,8 +838,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vma->gpuva.va.range = end - start + 1; if (read_only) vma->gpuva.flags |= XE_VMA_READ_ONLY; - if (is_null) - vma->gpuva.flags |= DRM_GPUVA_SPARSE; for_each_tile(tile, vm->xe, id) vma->tile_mask |= 0x1 << id; @@ -836,35 +854,35 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); if (IS_ERR(vm_bo)) { - kfree(vma); + xe_vma_free(vma); return ERR_CAST(vm_bo); } drm_gpuvm_bo_extobj_add(vm_bo); drm_gem_object_get(&bo->ttm.base); - vma->gpuva.gem.obj = &bo->ttm.base; vma->gpuva.gem.offset = bo_offset_or_userptr; drm_gpuva_link(&vma->gpuva, vm_bo); drm_gpuvm_bo_put(vm_bo); } else /* userptr or null */ { if (!is_null) { + struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; u64 size = end - start + 1; int err; - INIT_LIST_HEAD(&vma->userptr.invalidate_link); + INIT_LIST_HEAD(&userptr->invalidate_link); + INIT_LIST_HEAD(&userptr->repin_link); vma->gpuva.gem.offset = bo_offset_or_userptr; - err = mmu_interval_notifier_insert(&vma->userptr.notifier, + err = mmu_interval_notifier_insert(&userptr->notifier, current->mm, xe_vma_userptr(vma), size, &vma_userptr_notifier_ops); if (err) { - kfree(vma); - vma = ERR_PTR(err); - return vma; + xe_vma_free(vma); + return ERR_PTR(err); } - vma->userptr.notifier_seq = LONG_MAX; + userptr->notifier_seq = LONG_MAX; } xe_vm_get(vm); @@ -879,14 +897,21 @@ static void xe_vma_destroy_late(struct xe_vma *vma) struct xe_device *xe = vm->xe; bool read_only = xe_vma_read_only(vma); + if (vma->ufence) { + xe_sync_ufence_put(vma->ufence); + vma->ufence = NULL; + } + if (xe_vma_is_userptr(vma)) { - if (vma->userptr.sg) { + struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; + + if (userptr->sg) { dma_unmap_sgtable(xe->drm.dev, - vma->userptr.sg, + userptr->sg, read_only ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL, 0); - sg_free_table(vma->userptr.sg); - vma->userptr.sg = NULL; + sg_free_table(userptr->sg); + userptr->sg = NULL; } /* @@ -894,7 +919,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma) * the notifer until we're sure the GPU is not accessing * them anymore */ - mmu_interval_notifier_remove(&vma->userptr.notifier); + mmu_interval_notifier_remove(&userptr->notifier); xe_vm_put(vm); } else if (xe_vma_is_null(vma)) { xe_vm_put(vm); @@ -902,7 +927,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma) xe_bo_put(xe_vma_bo(vma)); } - kfree(vma); + xe_vma_free(vma); } static void vma_destroy_work_func(struct work_struct *w) @@ -933,7 +958,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); spin_lock(&vm->userptr.invalidated_lock); - list_del(&vma->userptr.invalidate_link); + list_del(&to_userptr_vma(vma)->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); } else if (!xe_vma_is_null(vma)) { xe_bo_assert_held(xe_vma_bo(vma)); @@ -975,9 +1000,16 @@ int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, int err; XE_WARN_ON(!vm); - err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared); - if (!err && bo && !bo->vm) - err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared); + if (num_shared) + err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared); + else + err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); + if (!err && bo && !bo->vm) { + if (num_shared) + err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared); + else + err = drm_exec_lock_obj(exec, &bo->ttm.base); + } return err; } @@ -1581,6 +1613,16 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, trace_xe_vma_unbind(vma); + if (vma->ufence) { + struct xe_user_fence * const f = vma->ufence; + + if (!xe_sync_ufence_get_status(f)) + return ERR_PTR(-EBUSY); + + vma->ufence = NULL; + xe_sync_ufence_put(f); + } + if (number_tiles > 1) { fences = kmalloc_array(number_tiles, sizeof(*fences), GFP_KERNEL); @@ -1714,6 +1756,21 @@ err_fences: return ERR_PTR(err); } +static struct xe_user_fence * +find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) +{ + unsigned int i; + + for (i = 0; i < num_syncs; i++) { + struct xe_sync_entry *e = &syncs[i]; + + if (xe_sync_is_ufence(e)) + return xe_sync_ufence_get(e); + } + + return NULL; +} + static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool immediate, bool first_op, @@ -1721,9 +1778,16 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, { struct dma_fence *fence; struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); + struct xe_user_fence *ufence; xe_vm_assert_held(vm); + ufence = find_ufence_get(syncs, num_syncs); + if (vma->ufence && ufence) + xe_sync_ufence_put(vma->ufence); + + vma->ufence = ufence ?: vma->ufence; + if (immediate) { fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op, last_op); @@ -1855,10 +1919,8 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, mutex_lock(&xef->vm.lock); err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); mutex_unlock(&xef->vm.lock); - if (err) { - xe_vm_close_and_put(vm); - return err; - } + if (err) + goto err_close_and_put; if (xe->info.has_asid) { mutex_lock(&xe->usm.lock); @@ -1866,11 +1928,9 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, XA_LIMIT(1, XE_MAX_ASID - 1), &xe->usm.next_asid, GFP_KERNEL); mutex_unlock(&xe->usm.lock); - if (err < 0) { - xe_vm_close_and_put(vm); - return err; - } - err = 0; + if (err < 0) + goto err_free_id; + vm->usm.asid = asid; } @@ -1888,6 +1948,15 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, #endif return 0; + +err_free_id: + mutex_lock(&xef->vm.lock); + xa_erase(&xef->vm.xa, id); + mutex_unlock(&xef->vm.lock); +err_close_and_put: + xe_vm_close_and_put(vm); + + return err; } int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, @@ -1954,6 +2023,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, xe_exec_queue_last_fence_get(wait_exec_queue, vm); xe_sync_entry_signal(&syncs[i], NULL, fence); + dma_fence_put(fence); } } @@ -2034,7 +2104,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; struct drm_gpuva_ops *ops; struct drm_gpuva_op *__op; - struct xe_vma_op *op; struct drm_gpuvm_bo *vm_bo; int err; @@ -2081,23 +2150,10 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, if (IS_ERR(ops)) return ops; -#ifdef TEST_VM_ASYNC_OPS_ERROR - if (operation & FORCE_ASYNC_OP_ERROR) { - op = list_first_entry_or_null(&ops->list, struct xe_vma_op, - base.entry); - if (op) - op->inject_error = true; - } -#endif - drm_gpuva_for_each_op(__op, ops) { struct xe_vma_op *op = gpuva_op_to_vma_op(__op); if (__op->op == DRM_GPUVA_OP_MAP) { - op->map.immediate = - flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; - op->map.read_only = - flags & DRM_XE_VM_BIND_FLAG_READONLY; op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; op->map.pat_index = pat_index; } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { @@ -2145,7 +2201,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, drm_exec_fini(&exec); if (xe_vma_is_userptr(vma)) { - err = xe_vma_userptr_pin_pages(vma); + err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); if (err) { prep_vma_destroy(vm, vma, false); xe_vma_destroy_unlocked(vma); @@ -2167,13 +2223,17 @@ static u64 xe_vma_max_pte_size(struct xe_vma *vma) { if (vma->gpuva.flags & XE_VMA_PTE_1G) return SZ_1G; - else if (vma->gpuva.flags & XE_VMA_PTE_2M) + else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) return SZ_2M; + else if (vma->gpuva.flags & XE_VMA_PTE_64K) + return SZ_64K; + else if (vma->gpuva.flags & XE_VMA_PTE_4K) + return SZ_4K; - return SZ_4K; + return SZ_1G; /* Uninitialized, used max size */ } -static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size) +static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) { switch (size) { case SZ_1G: @@ -2182,9 +2242,13 @@ static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size) case SZ_2M: vma->gpuva.flags |= XE_VMA_PTE_2M; break; + case SZ_64K: + vma->gpuva.flags |= XE_VMA_PTE_64K; + break; + case SZ_4K: + vma->gpuva.flags |= XE_VMA_PTE_4K; + break; } - - return SZ_4K; } static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) @@ -2282,8 +2346,6 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, switch (op->base.op) { case DRM_GPUVA_OP_MAP: { - flags |= op->map.read_only ? - VMA_CREATE_FLAG_READ_ONLY : 0; flags |= op->map.is_null ? VMA_CREATE_FLAG_IS_NULL : 0; @@ -2414,7 +2476,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, case DRM_GPUVA_OP_MAP: err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), op->syncs, op->num_syncs, - op->map.immediate || !xe_vm_in_fault_mode(vm), + !xe_vm_in_fault_mode(vm), op->flags & XE_VMA_OP_FIRST, op->flags & XE_VMA_OP_LAST); break; @@ -2500,13 +2562,25 @@ retry_userptr: } drm_exec_fini(&exec); - if (err == -EAGAIN && xe_vma_is_userptr(vma)) { + if (err == -EAGAIN) { lockdep_assert_held_write(&vm->lock); - err = xe_vma_userptr_pin_pages(vma); - if (!err) - goto retry_userptr; - trace_xe_vma_fail(vma); + if (op->base.op == DRM_GPUVA_OP_REMAP) { + if (!op->remap.unmap_done) + vma = gpuva_to_vma(op->base.remap.unmap->va); + else if (op->remap.prev) + vma = op->remap.prev; + else + vma = op->remap.next; + } + + if (xe_vma_is_userptr(vma)) { + err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); + if (!err) + goto retry_userptr; + + trace_xe_vma_fail(vma); + } } return err; @@ -2518,13 +2592,6 @@ static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) lockdep_assert_held_write(&vm->lock); -#ifdef TEST_VM_ASYNC_OPS_ERROR - if (op->inject_error) { - op->inject_error = false; - return -ENOMEM; - } -#endif - switch (op->base.op) { case DRM_GPUVA_OP_MAP: ret = __xe_vma_op_execute(vm, op->map.vma, op); @@ -2639,7 +2706,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, { int i; - for (i = num_ops_list - 1; i; ++i) { + for (i = num_ops_list - 1; i >= 0; --i) { struct drm_gpuva_ops *__ops = ops[i]; struct drm_gpuva_op *__op; @@ -2684,21 +2751,11 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, return 0; } -#ifdef TEST_VM_ASYNC_OPS_ERROR -#define SUPPORTED_FLAGS \ - (FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_READONLY | \ - DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | 0xffff) -#else -#define SUPPORTED_FLAGS \ - (DRM_XE_VM_BIND_FLAG_READONLY | \ - DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \ - 0xffff) -#endif +#define SUPPORTED_FLAGS (DRM_XE_VM_BIND_FLAG_NULL | \ + DRM_XE_VM_BIND_FLAG_DUMPABLE) #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) -#define MAX_BINDS 512 /* FIXME: Picking random upper limit */ - static int vm_bind_ioctl_check_args(struct xe_device *xe, struct drm_xe_vm_bind *args, struct drm_xe_vm_bind_op **bind_ops) @@ -2710,16 +2767,16 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->extensions) || - XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS)) + if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; if (args->num_binds > 1) { u64 __user *bind_user = u64_to_user_ptr(args->vector_of_binds); - *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) * - args->num_binds, GFP_KERNEL); + *bind_ops = kvmalloc_array(args->num_binds, + sizeof(struct drm_xe_vm_bind_op), + GFP_KERNEL | __GFP_ACCOUNT); if (!*bind_ops) return -ENOMEM; @@ -2809,7 +2866,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, free_bind_ops: if (args->num_binds > 1) - kfree(*bind_ops); + kvfree(*bind_ops); return err; } @@ -2846,7 +2903,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct drm_gpuva_ops **ops = NULL; struct xe_vm *vm; struct xe_exec_queue *q = NULL; - u32 num_syncs; + u32 num_syncs, num_ufence = 0; struct xe_sync_entry *syncs = NULL; struct drm_xe_vm_bind_op *bind_ops; LIST_HEAD(ops_list); @@ -2897,13 +2954,15 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } if (args->num_binds) { - bos = kcalloc(args->num_binds, sizeof(*bos), GFP_KERNEL); + bos = kvcalloc(args->num_binds, sizeof(*bos), + GFP_KERNEL | __GFP_ACCOUNT); if (!bos) { err = -ENOMEM; goto release_vm_lock; } - ops = kcalloc(args->num_binds, sizeof(*ops), GFP_KERNEL); + ops = kvcalloc(args->num_binds, sizeof(*ops), + GFP_KERNEL | __GFP_ACCOUNT); if (!ops) { err = -ENOMEM; goto release_vm_lock; @@ -2983,6 +3042,14 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); if (err) goto free_syncs; + + if (xe_sync_is_ufence(&syncs[num_syncs])) + num_ufence++; + } + + if (XE_IOCTL_DBG(xe, num_ufence > 1)) { + err = -EINVAL; + goto free_syncs; } if (!args->num_binds) { @@ -3036,10 +3103,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (i = 0; bos && i < args->num_binds; ++i) xe_bo_put(bos[i]); - kfree(bos); - kfree(ops); + kvfree(bos); + kvfree(ops); if (args->num_binds > 1) - kfree(bind_ops); + kvfree(bind_ops); return err; @@ -3063,10 +3130,10 @@ put_exec_queue: if (q) xe_exec_queue_put(q); free_objs: - kfree(bos); - kfree(ops); + kvfree(bos); + kvfree(ops); if (args->num_binds > 1) - kfree(bind_ops); + kvfree(bind_ops); return err; } @@ -3125,8 +3192,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { if (xe_vma_is_userptr(vma)) { WARN_ON_ONCE(!mmu_interval_check_retry - (&vma->userptr.notifier, - vma->userptr.notifier_seq)); + (&to_userptr_vma(vma)->userptr.notifier, + to_userptr_vma(vma)->userptr.notifier_seq)); WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), DMA_RESV_USAGE_BOOKKEEP)); @@ -3187,11 +3254,11 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) if (is_null) { addr = 0; } else if (is_userptr) { + struct sg_table *sg = to_userptr_vma(vma)->userptr.sg; struct xe_res_cursor cur; - if (vma->userptr.sg) { - xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE, - &cur); + if (sg) { + xe_res_first_sg(sg, 0, XE_PAGE_SIZE, &cur); addr = xe_res_dma(&cur); } else { addr = 0; diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index cf2f96e8c1ab..9654a0612fc2 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -160,6 +160,18 @@ static inline bool xe_vma_is_userptr(struct xe_vma *vma) return xe_vma_has_no_bo(vma) && !xe_vma_is_null(vma); } +/** + * to_userptr_vma() - Return a pointer to an embedding userptr vma + * @vma: Pointer to the embedded struct xe_vma + * + * Return: Pointer to the embedding userptr vma + */ +static inline struct xe_userptr_vma *to_userptr_vma(struct xe_vma *vma) +{ + xe_assert(xe_vma_vm(vma)->xe, xe_vma_is_userptr(vma)); + return container_of(vma, struct xe_userptr_vma, vma); +} + u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile); int xe_vm_create_ioctl(struct drm_device *dev, void *data, @@ -224,9 +236,9 @@ static inline void xe_vm_reactivate_rebind(struct xe_vm *vm) } } -int xe_vma_userptr_pin_pages(struct xe_vma *vma); +int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma); -int xe_vma_userptr_check_repin(struct xe_vma *vma); +int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma); bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 63e8a50b88e9..7300eea5394b 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -19,11 +19,9 @@ struct xe_bo; struct xe_sync_entry; +struct xe_user_fence; struct xe_vm; -#define TEST_VM_ASYNC_OPS_ERROR -#define FORCE_ASYNC_OP_ERROR BIT(31) - #define XE_VMA_READ_ONLY DRM_GPUVA_USERBITS #define XE_VMA_DESTROYED (DRM_GPUVA_USERBITS << 1) #define XE_VMA_ATOMIC_PTE_BIT (DRM_GPUVA_USERBITS << 2) @@ -32,11 +30,15 @@ struct xe_vm; #define XE_VMA_PTE_4K (DRM_GPUVA_USERBITS << 5) #define XE_VMA_PTE_2M (DRM_GPUVA_USERBITS << 6) #define XE_VMA_PTE_1G (DRM_GPUVA_USERBITS << 7) +#define XE_VMA_PTE_64K (DRM_GPUVA_USERBITS << 8) +#define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 9) /** struct xe_userptr - User pointer */ struct xe_userptr { /** @invalidate_link: Link for the vm::userptr.invalidated list */ struct list_head invalidate_link; + /** @userptr: link into VM repin list if userptr. */ + struct list_head repin_link; /** * @notifier: MMU notifier for user pointer (invalidation call back) */ @@ -68,8 +70,6 @@ struct xe_vma { * resv. */ union { - /** @userptr: link into VM repin list if userptr. */ - struct list_head userptr; /** @rebind: link into VM if this VMA needs rebinding. */ struct list_head rebind; /** @destroy: link to contested list when VM is being closed. */ @@ -107,9 +107,19 @@ struct xe_vma { u16 pat_index; /** - * @userptr: user pointer state, only allocated for VMAs that are - * user pointers + * @ufence: The user fence that was provided with MAP. + * Needs to be signalled before UNMAP can be processed. */ + struct xe_user_fence *ufence; +}; + +/** + * struct xe_userptr_vma - A userptr vma subclass + * @vma: The vma. + * @userptr: Additional userptr information. + */ +struct xe_userptr_vma { + struct xe_vma vma; struct xe_userptr userptr; }; @@ -285,10 +295,6 @@ struct xe_vm { struct xe_vma_op_map { /** @vma: VMA to map */ struct xe_vma *vma; - /** @immediate: Immediate bind */ - bool immediate; - /** @read_only: Read only */ - bool read_only; /** @is_null: is NULL binding */ bool is_null; /** @pat_index: The pat index to use for this operation. */ @@ -356,11 +362,6 @@ struct xe_vma_op { /** @flags: operation flags */ enum xe_vma_op_flags flags; -#ifdef TEST_VM_ASYNC_OPS_ERROR - /** @inject_error: inject error to test async op error handling */ - bool inject_error; -#endif - union { /** @map: VMA map operation specific data */ struct xe_vma_op_map map; diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 42fd504abbcd..89983d7d73ca 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -169,6 +169,7 @@ static const struct host1x_info host1x06_info = { .num_sid_entries = ARRAY_SIZE(tegra186_sid_table), .sid_table = tegra186_sid_table, .reserve_vblank_syncpts = false, + .skip_reset_assert = true, }; static const struct host1x_sid_entry tegra194_sid_table[] = { @@ -680,13 +681,15 @@ static int __maybe_unused host1x_runtime_suspend(struct device *dev) host1x_intr_stop(host); host1x_syncpt_save(host); - err = reset_control_bulk_assert(host->nresets, host->resets); - if (err) { - dev_err(dev, "failed to assert reset: %d\n", err); - goto resume_host1x; - } + if (!host->info->skip_reset_assert) { + err = reset_control_bulk_assert(host->nresets, host->resets); + if (err) { + dev_err(dev, "failed to assert reset: %d\n", err); + goto resume_host1x; + } - usleep_range(1000, 2000); + usleep_range(1000, 2000); + } clk_disable_unprepare(host->clk); reset_control_bulk_release(host->nresets, host->resets); diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index c8e302de7625..925a118db23f 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -116,6 +116,12 @@ struct host1x_info { * the display driver disables VBLANK increments. */ bool reserve_vblank_syncpts; + /* + * On Tegra186, secure world applications may require access to + * host1x during suspend/resume. To allow this, we need to leave + * host1x not in reset. + */ + bool skip_reset_assert; }; struct host1x { |