diff options
Diffstat (limited to 'drivers/gpu/drm')
290 files changed, 5366 insertions, 1903 deletions
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index f92cd7892711..cc637343d87b 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -70,6 +70,7 @@ drm-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o drm-$(CONFIG_DRM_PRIVACY_SCREEN) += \ drm_privacy_screen.o \ drm_privacy_screen_x86.o +drm-$(CONFIG_DRM_ACCEL) += ../../accel/drm_accel.o obj-$(CONFIG_DRM) += drm.o obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 712075a491f2..798d0e9a60b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ - amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o + amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ + amdgpu_ring_mux.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index f50e3ba4d7a5..0040deaf8a83 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -29,6 +29,7 @@ #include <linux/mm.h> #include <linux/kthread.h> #include <linux/workqueue.h> +#include <linux/mmu_notifier.h> #include <kgd_kfd_interface.h> #include <drm/ttm/ttm_execbuf_util.h> #include "amdgpu_sync.h" @@ -65,6 +66,7 @@ struct kgd_mem { struct mutex lock; struct amdgpu_bo *bo; struct dma_buf *dmabuf; + struct hmm_range *range; struct list_head attachments; /* protected by amdkfd_process_info.lock */ struct ttm_validate_buffer validate_list; @@ -75,7 +77,7 @@ struct kgd_mem { uint32_t alloc_flags; - atomic_t invalid; + uint32_t invalid; struct amdkfd_process_info *process_info; struct amdgpu_sync sync; @@ -131,7 +133,8 @@ struct amdkfd_process_info { struct amdgpu_amdkfd_fence *eviction_fence; /* MMU-notifier related fields */ - atomic_t evicted_bos; + struct mutex notifier_lock; + uint32_t evicted_bos; struct delayed_work restore_userptr_work; struct pid *pid; bool block_mmu_notifications; @@ -180,7 +183,8 @@ int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data); bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo); -int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); +int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, + unsigned long cur_seq, struct kgd_mem *mem); #else static inline bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) @@ -201,7 +205,8 @@ int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) } static inline -int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) +int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, + unsigned long cur_seq, struct kgd_mem *mem) { return 0; } @@ -265,8 +270,10 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_ (&((struct amdgpu_fpriv *) \ ((struct drm_file *)(drm_priv))->driver_priv)->vm) +int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev, + struct file *filp, u32 pasid); int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, - struct file *filp, u32 pasid, + struct file *filp, void **process_info, struct dma_fence **ef); void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 3a763916a5a1..b15091d8310d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -964,7 +964,9 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, * later stage when it is scheduled by another ioctl called by * CRIU master process for the target pid for restore. */ - atomic_inc(&mem->invalid); + mutex_lock(&process_info->notifier_lock); + mem->invalid++; + mutex_unlock(&process_info->notifier_lock); mutex_unlock(&process_info->lock); return 0; } @@ -1301,6 +1303,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, return -ENOMEM; mutex_init(&info->lock); + mutex_init(&info->notifier_lock); INIT_LIST_HEAD(&info->vm_list_head); INIT_LIST_HEAD(&info->kfd_bo_list); INIT_LIST_HEAD(&info->userptr_valid_list); @@ -1317,7 +1320,6 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, } info->pid = get_task_pid(current->group_leader, PIDTYPE_PID); - atomic_set(&info->evicted_bos, 0); INIT_DELAYED_WORK(&info->restore_userptr_work, amdgpu_amdkfd_restore_userptr_worker); @@ -1372,6 +1374,7 @@ reserve_pd_fail: put_pid(info->pid); create_evict_fence_fail: mutex_destroy(&info->lock); + mutex_destroy(&info->notifier_lock); kfree(info); } return ret; @@ -1426,10 +1429,9 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo) amdgpu_bo_unreserve(bo); } -int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, - struct file *filp, u32 pasid, - void **process_info, - struct dma_fence **ef) +int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev, + struct file *filp, u32 pasid) + { struct amdgpu_fpriv *drv_priv; struct amdgpu_vm *avm; @@ -1440,10 +1442,6 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, return ret; avm = &drv_priv->vm; - /* Already a compute VM? */ - if (avm->process_info) - return -EINVAL; - /* Free the original amdgpu allocated pasid, * will be replaced with kfd allocated pasid. */ @@ -1452,14 +1450,36 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, amdgpu_vm_set_pasid(adev, avm, 0); } - /* Convert VM into a compute VM */ - ret = amdgpu_vm_make_compute(adev, avm); + ret = amdgpu_vm_set_pasid(adev, avm, pasid); if (ret) return ret; - ret = amdgpu_vm_set_pasid(adev, avm, pasid); + return 0; +} + +int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, + struct file *filp, + void **process_info, + struct dma_fence **ef) +{ + struct amdgpu_fpriv *drv_priv; + struct amdgpu_vm *avm; + int ret; + + ret = amdgpu_file_to_fpriv(filp, &drv_priv); if (ret) return ret; + avm = &drv_priv->vm; + + /* Already a compute VM? */ + if (avm->process_info) + return -EINVAL; + + /* Convert VM into a compute VM */ + ret = amdgpu_vm_make_compute(adev, avm); + if (ret) + return ret; + /* Initialize KFD part of the VM and process info */ ret = init_kfd_vm(avm, process_info, ef); if (ret) @@ -1496,6 +1516,7 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, cancel_delayed_work_sync(&process_info->restore_userptr_work); put_pid(process_info->pid); mutex_destroy(&process_info->lock); + mutex_destroy(&process_info->notifier_lock); kfree(process_info); } } @@ -1548,7 +1569,9 @@ int amdgpu_amdkfd_criu_resume(void *p) mutex_lock(&pinfo->lock); pr_debug("scheduling work\n"); - atomic_inc(&pinfo->evicted_bos); + mutex_lock(&pinfo->notifier_lock); + pinfo->evicted_bos++; + mutex_unlock(&pinfo->notifier_lock); if (!READ_ONCE(pinfo->block_mmu_notifications)) { ret = -EINVAL; goto out_unlock; @@ -1773,8 +1796,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( list_del(&bo_list_entry->head); mutex_unlock(&process_info->lock); - /* No more MMU notifiers */ - amdgpu_hmm_unregister(mem->bo); + /* Cleanup user pages and MMU notifiers */ + if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) { + amdgpu_hmm_unregister(mem->bo); + mutex_lock(&process_info->notifier_lock); + amdgpu_ttm_tt_discard_user_pages(mem->bo->tbo.ttm, mem->range); + mutex_unlock(&process_info->notifier_lock); + } ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); if (unlikely(ret)) @@ -1864,6 +1892,16 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( */ mutex_lock(&mem->process_info->lock); + /* Lock notifier lock. If we find an invalid userptr BO, we can be + * sure that the MMU notifier is no longer running + * concurrently and the queues are actually stopped + */ + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { + mutex_lock(&mem->process_info->notifier_lock); + is_invalid_userptr = !!mem->invalid; + mutex_unlock(&mem->process_info->notifier_lock); + } + mutex_lock(&mem->lock); domain = mem->domain; @@ -2204,7 +2242,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, ret = drm_vma_node_allow(&obj->vma_node, drm_priv); if (ret) { - kfree(mem); + kfree(*mem); return ret; } @@ -2241,34 +2279,38 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, * * Runs in MMU notifier, may be in RECLAIM_FS context. This means it * cannot do any memory allocations, and cannot take any locks that - * are held elsewhere while allocating memory. Therefore this is as - * simple as possible, using atomic counters. + * are held elsewhere while allocating memory. * * It doesn't do anything to the BO itself. The real work happens in * restore, where we get updated page addresses. This function only * ensures that GPU access to the BO is stopped. */ -int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, - struct mm_struct *mm) +int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, + unsigned long cur_seq, struct kgd_mem *mem) { struct amdkfd_process_info *process_info = mem->process_info; - int evicted_bos; int r = 0; - /* Do not process MMU notifications until stage-4 IOCTL is received */ + /* Do not process MMU notifications during CRIU restore until + * KFD_CRIU_OP_RESUME IOCTL is received + */ if (READ_ONCE(process_info->block_mmu_notifications)) return 0; - atomic_inc(&mem->invalid); - evicted_bos = atomic_inc_return(&process_info->evicted_bos); - if (evicted_bos == 1) { + mutex_lock(&process_info->notifier_lock); + mmu_interval_set_seq(mni, cur_seq); + + mem->invalid++; + if (++process_info->evicted_bos == 1) { /* First eviction, stop the queues */ - r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_USERPTR); + r = kgd2kfd_quiesce_mm(mni->mm, + KFD_QUEUE_EVICTION_TRIGGER_USERPTR); if (r) pr_err("Failed to quiesce KFD\n"); schedule_delayed_work(&process_info->restore_userptr_work, msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); } + mutex_unlock(&process_info->notifier_lock); return r; } @@ -2285,54 +2327,58 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, struct kgd_mem *mem, *tmp_mem; struct amdgpu_bo *bo; struct ttm_operation_ctx ctx = { false, false }; - int invalid, ret; + uint32_t invalid; + int ret = 0; - /* Move all invalidated BOs to the userptr_inval_list and - * release their user pages by migration to the CPU domain - */ + mutex_lock(&process_info->notifier_lock); + + /* Move all invalidated BOs to the userptr_inval_list */ list_for_each_entry_safe(mem, tmp_mem, &process_info->userptr_valid_list, - validate_list.head) { - if (!atomic_read(&mem->invalid)) - continue; /* BO is still valid */ - - bo = mem->bo; - - if (amdgpu_bo_reserve(bo, true)) - return -EAGAIN; - amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); - ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - amdgpu_bo_unreserve(bo); - if (ret) { - pr_err("%s: Failed to invalidate userptr BO\n", - __func__); - return -EAGAIN; - } - - list_move_tail(&mem->validate_list.head, - &process_info->userptr_inval_list); - } - - if (list_empty(&process_info->userptr_inval_list)) - return 0; /* All evicted userptr BOs were freed */ + validate_list.head) + if (mem->invalid) + list_move_tail(&mem->validate_list.head, + &process_info->userptr_inval_list); /* Go through userptr_inval_list and update any invalid user_pages */ list_for_each_entry(mem, &process_info->userptr_inval_list, validate_list.head) { - struct hmm_range *range; - - invalid = atomic_read(&mem->invalid); + invalid = mem->invalid; if (!invalid) /* BO hasn't been invalidated since the last - * revalidation attempt. Keep its BO list. + * revalidation attempt. Keep its page list. */ continue; bo = mem->bo; + amdgpu_ttm_tt_discard_user_pages(bo->tbo.ttm, mem->range); + mem->range = NULL; + + /* BO reservations and getting user pages (hmm_range_fault) + * must happen outside the notifier lock + */ + mutex_unlock(&process_info->notifier_lock); + + /* Move the BO to system (CPU) domain if necessary to unmap + * and free the SG table + */ + if (bo->tbo.resource->mem_type != TTM_PL_SYSTEM) { + if (amdgpu_bo_reserve(bo, true)) + return -EAGAIN; + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + amdgpu_bo_unreserve(bo); + if (ret) { + pr_err("%s: Failed to invalidate userptr BO\n", + __func__); + return -EAGAIN; + } + } + /* Get updated user pages */ ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, - &range); + &mem->range); if (ret) { pr_debug("Failed %d to get user pages\n", ret); @@ -2345,30 +2391,32 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, */ if (ret != -EFAULT) return ret; - } else { - /* - * FIXME: Cannot ignore the return code, must hold - * notifier_lock - */ - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); + ret = 0; } + mutex_lock(&process_info->notifier_lock); + /* Mark the BO as valid unless it was invalidated * again concurrently. */ - if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) - return -EAGAIN; + if (mem->invalid != invalid) { + ret = -EAGAIN; + goto unlock_out; + } + mem->invalid = 0; } - return 0; +unlock_out: + mutex_unlock(&process_info->notifier_lock); + + return ret; } /* Validate invalid userptr BOs * - * Validates BOs on the userptr_inval_list, and moves them back to the - * userptr_valid_list. Also updates GPUVM page tables with new page - * addresses and waits for the page table updates to complete. + * Validates BOs on the userptr_inval_list. Also updates GPUVM page tables + * with new page addresses and waits for the page table updates to complete. */ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) { @@ -2439,9 +2487,6 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) } } - list_move_tail(&mem->validate_list.head, - &process_info->userptr_valid_list); - /* Update mapping. If the BO was not validated * (because we couldn't get user pages), this will * clear the page table entries, which will result in @@ -2457,7 +2502,9 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) if (ret) { pr_err("%s: update PTE failed\n", __func__); /* make sure this gets validated again */ - atomic_inc(&mem->invalid); + mutex_lock(&process_info->notifier_lock); + mem->invalid++; + mutex_unlock(&process_info->notifier_lock); goto unreserve_out; } } @@ -2477,6 +2524,36 @@ out_no_mem: return ret; } +/* Confirm that all user pages are valid while holding the notifier lock + * + * Moves valid BOs from the userptr_inval_list back to userptr_val_list. + */ +static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_info) +{ + struct kgd_mem *mem, *tmp_mem; + int ret = 0; + + list_for_each_entry_safe(mem, tmp_mem, + &process_info->userptr_inval_list, + validate_list.head) { + bool valid = amdgpu_ttm_tt_get_user_pages_done( + mem->bo->tbo.ttm, mem->range); + + mem->range = NULL; + if (!valid) { + WARN(!mem->invalid, "Invalid BO not marked invalid"); + ret = -EAGAIN; + continue; + } + WARN(mem->invalid, "Valid BO is marked invalid"); + + list_move_tail(&mem->validate_list.head, + &process_info->userptr_valid_list); + } + + return ret; +} + /* Worker callback to restore evicted userptr BOs * * Tries to update and validate all userptr BOs. If successful and no @@ -2491,9 +2568,11 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) restore_userptr_work); struct task_struct *usertask; struct mm_struct *mm; - int evicted_bos; + uint32_t evicted_bos; - evicted_bos = atomic_read(&process_info->evicted_bos); + mutex_lock(&process_info->notifier_lock); + evicted_bos = process_info->evicted_bos; + mutex_unlock(&process_info->notifier_lock); if (!evicted_bos) return; @@ -2516,9 +2595,6 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) * and we can just restart the queues. */ if (!list_empty(&process_info->userptr_inval_list)) { - if (atomic_read(&process_info->evicted_bos) != evicted_bos) - goto unlock_out; /* Concurrent eviction, try again */ - if (validate_invalid_user_pages(process_info)) goto unlock_out; } @@ -2527,10 +2603,17 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) * be a first eviction that calls quiesce_mm. The eviction * reference counting inside KFD will handle this case. */ - if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) != - evicted_bos) - goto unlock_out; - evicted_bos = 0; + mutex_lock(&process_info->notifier_lock); + if (process_info->evicted_bos != evicted_bos) + goto unlock_notifier_out; + + if (confirm_valid_user_pages_locked(process_info)) { + WARN(1, "User pages unexpectedly invalid"); + goto unlock_notifier_out; + } + + process_info->evicted_bos = evicted_bos = 0; + if (kgd2kfd_resume_mm(mm)) { pr_err("%s: Failed to resume KFD\n", __func__); /* No recovery from this failure. Probably the CP is @@ -2538,6 +2621,8 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) */ } +unlock_notifier_out: + mutex_unlock(&process_info->notifier_lock); unlock_out: mutex_unlock(&process_info->lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 9b97fa39d47a..ac6fe0ae4609 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -104,7 +104,7 @@ void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev) static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev, struct vram_usagebyfirmware_v2_1 *fw_usage, int *usage_bytes) { - uint32_t start_addr, fw_size, drv_size; + u32 start_addr, fw_size, drv_size; start_addr = le32_to_cpu(fw_usage->start_address_in_kb); fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb); @@ -116,7 +116,7 @@ static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev, drv_size); if ((start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) == - (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION << + (u32)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION << ATOM_VRAM_OPERATION_FLAGS_SHIFT)) { /* Firmware request VRAM reservation for SR-IOV */ adev->mman.fw_vram_usage_start_offset = (start_addr & @@ -133,7 +133,7 @@ static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev, static int amdgpu_atomfirmware_allocate_fb_v2_2(struct amdgpu_device *adev, struct vram_usagebyfirmware_v2_2 *fw_usage, int *usage_bytes) { - uint32_t fw_start_addr, fw_size, drv_start_addr, drv_size; + u32 fw_start_addr, fw_size, drv_start_addr, drv_size; fw_start_addr = le32_to_cpu(fw_usage->fw_region_start_address_in_kb); fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb); @@ -147,14 +147,18 @@ static int amdgpu_atomfirmware_allocate_fb_v2_2(struct amdgpu_device *adev, drv_start_addr, drv_size); - if ((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << 30)) == 0) { + if (amdgpu_sriov_vf(adev) && + ((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << + ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) { /* Firmware request VRAM reservation for SR-IOV */ adev->mman.fw_vram_usage_start_offset = (fw_start_addr & (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; adev->mman.fw_vram_usage_size = fw_size << 10; } - if ((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << 30)) == 0) { + if (amdgpu_sriov_vf(adev) && + ((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << + ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) { /* driver request VRAM reservation for SR-IOV */ adev->mman.drv_vram_usage_start_offset = (drv_start_addr & (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; @@ -172,8 +176,8 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) vram_usagebyfirmware); struct vram_usagebyfirmware_v2_1 *fw_usage_v2_1; struct vram_usagebyfirmware_v2_2 *fw_usage_v2_2; - uint16_t data_offset; - uint8_t frev, crev; + u16 data_offset; + u8 frev, crev; int usage_bytes = 0; if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index e363f56c72af..30c28a69e847 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -317,6 +317,7 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev) if (!found) return false; + pci_dev_put(pdev); adev->bios = kmalloc(size, GFP_KERNEL); if (!adev->bios) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index e1320edfc527..2ebbc6382a06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -327,7 +327,6 @@ static void amdgpu_connector_free_edid(struct drm_connector *connector) kfree(amdgpu_connector->edid); amdgpu_connector->edid = NULL; - drm_connector_update_edid_property(connector, NULL); } static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 459150460815..8516c814bc9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -109,6 +109,7 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p, return r; ++(num_ibs[r]); + p->gang_leader_idx = r; return 0; } @@ -298,7 +299,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, if (ret) goto free_all_kdata; } - p->gang_leader = p->jobs[p->gang_size - 1]; + p->gang_leader = p->jobs[p->gang_leader_idx]; if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) { ret = -ECANCELED; @@ -1210,10 +1211,9 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) return r; } - r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_size - 1]); + r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); if (r && r != -ERESTARTSYS) DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); - return r; } @@ -1247,9 +1247,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, for (i = 0; i < p->gang_size; ++i) drm_sched_job_arm(&p->jobs[i]->base); - for (i = 0; i < (p->gang_size - 1); ++i) { + for (i = 0; i < p->gang_size; ++i) { struct dma_fence *fence; + if (p->jobs[i] == leader) + continue; + fence = &p->jobs[i]->base.s_fence->scheduled; r = drm_sched_job_add_dependency(&leader->base, fence); if (r) @@ -1286,7 +1289,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, list_for_each_entry(e, &p->validated, tv.head) { /* Everybody except for the gang leader uses READ */ - for (i = 0; i < (p->gang_size - 1); ++i) { + for (i = 0; i < p->gang_size; ++i) { + if (p->jobs[i] == leader) + continue; + dma_resv_add_fence(e->tv.bo->base.resv, &p->jobs[i]->base.s_fence->finished, DMA_RESV_USAGE_READ); @@ -1296,7 +1302,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, e->tv.num_shared = 0; } - seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size - 1], + seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx], p->fence); amdgpu_cs_post_dependencies(p); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h index 207e801c24ed..113f39510a72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h @@ -54,6 +54,7 @@ struct amdgpu_cs_parser { /* scheduler job objects */ unsigned int gang_size; + unsigned int gang_leader_idx; struct drm_sched_entity *entities[AMDGPU_CS_GANG_SIZE]; struct amdgpu_job *jobs[AMDGPU_CS_GANG_SIZE]; struct amdgpu_job *gang_leader; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b2b1c66bfe39..afe6af9c0138 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2473,6 +2473,11 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) { struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + if (WARN_ON(!hive)) { + r = -ENOENT; + goto init_failed; + } + if (!hive->reset_domain || !amdgpu_reset_get_reset_domain(hive->reset_domain)) { r = -ENOENT; @@ -3011,14 +3016,15 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) continue; } - /* skip suspend of gfx and psp for S0ix + /* skip suspend of gfx/mes and psp for S0ix * gfx is in gfxoff state, so on resume it will exit gfxoff just * like at runtime. PSP is also part of the always on hardware * so no need to suspend it. */ if (adev->in_s0ix && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)) + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES)) continue; /* XXX handle errors */ @@ -4107,6 +4113,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) adev->in_suspend = true; + /* Evict the majority of BOs before grabbing the full access */ + r = amdgpu_device_evict_resources(adev); + if (r) + return r; + if (amdgpu_sriov_vf(adev)) { amdgpu_virt_fini_data_exchange(adev); r = amdgpu_virt_request_full_gpu(adev, false); @@ -4181,21 +4192,15 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) r = amdgpu_device_ip_resume(adev); - /* no matter what r is, always need to properly release full GPU */ - if (amdgpu_sriov_vf(adev)) { - amdgpu_virt_init_data_exchange(adev); - amdgpu_virt_release_full_gpu(adev, true); - } - if (r) { dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r); - return r; + goto exit; } amdgpu_fence_driver_hw_init(adev); r = amdgpu_device_ip_late_init(adev); if (r) - return r; + goto exit; queue_delayed_work(system_wq, &adev->delayed_init_work, msecs_to_jiffies(AMDGPU_RESUME_MS)); @@ -4203,12 +4208,19 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) if (!adev->in_s0ix) { r = amdgpu_amdkfd_resume(adev, adev->in_runpm); if (r) - return r; + goto exit; + } + +exit: + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_init_data_exchange(adev); + amdgpu_virt_release_full_gpu(adev, true); } + if (r) + return r; + /* Make sure IB tests flushed */ - if (amdgpu_sriov_vf(adev)) - amdgpu_irq_gpu_reset_resume_helper(adev); flush_delayed_work(&adev->delayed_init_work); if (adev->in_s0ix) { @@ -5045,6 +5057,8 @@ static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) pm_runtime_enable(&(p->dev)); pm_runtime_resume(&(p->dev)); } + + pci_dev_put(p); } static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) @@ -5083,6 +5097,7 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) if (expires < ktime_get_mono_fast_ns()) { dev_warn(adev->dev, "failed to suspend display audio\n"); + pci_dev_put(p); /* TODO: abort the succeeding gpu reset? */ return -ETIMEDOUT; } @@ -5090,6 +5105,7 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) pm_runtime_disable(&(p->dev)); + pci_dev_put(p); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 6b48178455bc..1bbd56029a4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1512,6 +1512,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): amdgpu_device_ip_block_add(adev, &soc21_common_ip_block); break; default: @@ -1556,6 +1557,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block); break; default: @@ -1641,6 +1643,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 11): amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block); break; case IP_VERSION(13, 0, 4): @@ -1691,6 +1694,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 11): amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block); break; default: @@ -1702,11 +1706,13 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) return 0; } +#if defined(CONFIG_DRM_AMD_DC) static void amdgpu_discovery_set_sriov_display(struct amdgpu_device *adev) { amdgpu_device_set_sriov_virtual_display(adev); amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); } +#endif static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) { @@ -1802,6 +1808,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block); break; default: @@ -1965,6 +1972,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block); adev->enable_mes = true; adev->enable_mes_kiq = true; @@ -2195,6 +2203,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->family = AMDGPU_FAMILY_GC_11_0_0; break; case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): adev->family = AMDGPU_FAMILY_GC_11_0_1; break; default: @@ -2212,6 +2221,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 7): case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): adev->flags |= AMD_IS_APU; break; default: @@ -2268,6 +2278,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->nbio.hdp_flush_reg = &nbio_v4_3_hdp_flush_reg; break; case IP_VERSION(7, 7, 0): + case IP_VERSION(7, 7, 1): adev->nbio.funcs = &nbio_v7_7_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 5697d456d7d0..b4f2d61ea0d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -231,17 +231,18 @@ module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444); /** * DOC: gartsize (uint) - * Restrict the size of GART in Mib (32, 64, etc.) for testing. The default is -1 (The size depends on asic). + * Restrict the size of GART (for kernel use) in Mib (32, 64, etc.) for testing. + * The default is -1 (The size depends on asic). */ -MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)"); +MODULE_PARM_DESC(gartsize, "Size of kernel GART to setup in megabytes (32, 64, etc., -1=auto)"); module_param_named(gartsize, amdgpu_gart_size, uint, 0600); /** * DOC: gttsize (int) - * Restrict the size of GTT domain in MiB for testing. The default is -1 (It's VRAM size if 3GB < VRAM < 3/4 RAM, - * otherwise 3/4 RAM size). + * Restrict the size of GTT domain (for userspace use) in MiB for testing. + * The default is -1 (Use 1/2 RAM, minimum value is 3GB). */ -MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)"); +MODULE_PARM_DESC(gttsize, "Size of the GTT userspace domain in megabytes (-1 = auto)"); module_param_named(gttsize, amdgpu_gtt_size, int, 0600); /** @@ -2038,6 +2039,15 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, "See modparam exp_hw_support\n"); return -ENODEV; } + /* differentiate between P10 and P11 asics with the same DID */ + if (pdev->device == 0x67FF && + (pdev->revision == 0xE3 || + pdev->revision == 0xE7 || + pdev->revision == 0xF3 || + pdev->revision == 0xF7)) { + flags &= ~AMD_ASIC_MASK; + flags |= CHIP_POLARIS10; + } /* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping, * however, SME requires an indirect IOMMU mapping because the encryption @@ -2107,12 +2117,12 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, ddev); - ret = amdgpu_driver_load_kms(adev, ent->driver_data); + ret = amdgpu_driver_load_kms(adev, flags); if (ret) goto err_pci; retry_init: - ret = drm_dev_register(ddev, ent->driver_data); + ret = drm_dev_register(ddev, flags); if (ret == -EAGAIN && ++retry <= 3) { DRM_INFO("retry init %d\n", retry); /* Don't request EX mode too frequently which is attacking */ @@ -2570,6 +2580,8 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) amdgpu_device_baco_enter(drm_dev); } + dev_dbg(&pdev->dev, "asic/device is runtime suspended\n"); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index d0d99ed607dd..00444203220d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -55,6 +55,7 @@ struct amdgpu_fence { /* RB, DMA, etc. */ struct amdgpu_ring *ring; + ktime_t start_timestamp; }; static struct kmem_cache *amdgpu_fence_slab; @@ -199,6 +200,8 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd } } + to_amdgpu_fence(fence)->start_timestamp = ktime_get(); + /* This function can't be called concurrently anyway, otherwise * emitting the fence would mess up the hardware ring buffer. */ @@ -407,6 +410,57 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) } /** + * amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now + * @ring: ring the fence is associated with + * + * Find the earliest fence unsignaled until now, calculate the time delta + * between the time fence emitted and now. + */ +u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring) +{ + struct amdgpu_fence_driver *drv = &ring->fence_drv; + struct dma_fence *fence; + uint32_t last_seq, sync_seq; + + last_seq = atomic_read(&ring->fence_drv.last_seq); + sync_seq = READ_ONCE(ring->fence_drv.sync_seq); + if (last_seq == sync_seq) + return 0; + + ++last_seq; + last_seq &= drv->num_fences_mask; + fence = drv->fences[last_seq]; + if (!fence) + return 0; + + return ktime_us_delta(ktime_get(), + to_amdgpu_fence(fence)->start_timestamp); +} + +/** + * amdgpu_fence_update_start_timestamp - update the timestamp of the fence + * @ring: ring the fence is associated with + * @seq: the fence seq number to update. + * @timestamp: the start timestamp to update. + * + * The function called at the time the fence and related ib is about to + * resubmit to gpu in MCBP scenario. Thus we do not consider race condition + * with amdgpu_fence_process to modify the same fence. + */ +void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, ktime_t timestamp) +{ + struct amdgpu_fence_driver *drv = &ring->fence_drv; + struct dma_fence *fence; + + seq &= drv->num_fences_mask; + fence = drv->fences[seq]; + if (!fence) + return; + + to_amdgpu_fence(fence)->start_timestamp = timestamp; +} + +/** * amdgpu_fence_driver_start_ring - make the fence driver * ready for use on the requested ring. * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index 2c38ac7bc643..4620c4712ce3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -64,7 +64,8 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr) sizeof(atom_ctx->vbios_version)) || strnstr(atom_ctx->vbios_version, "D163", sizeof(atom_ctx->vbios_version))) { - *fru_addr = FRU_EEPROM_MADDR_6; + if (fru_addr) + *fru_addr = FRU_EEPROM_MADDR_6; return true; } else { return false; @@ -83,7 +84,8 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr) sizeof(atom_ctx->vbios_version))) { return false; } else { - *fru_addr = FRU_EEPROM_MADDR_6; + if (fru_addr) + *fru_addr = FRU_EEPROM_MADDR_6; return true; } } else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index a0780a4e3e61..bb7350ea1d75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -256,7 +256,7 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str * becoming writable and makes is_cow_mapping(vm_flags) false. */ if (is_cow_mapping(vma->vm_flags) && - !(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) + !(vma->vm_flags & VM_ACCESS_FLAGS)) vma->vm_flags &= ~VM_MAYWRITE; return drm_gem_ttm_mmap(obj, vma); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 832b3807f1d6..b3df4787877e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -33,6 +33,7 @@ #include "amdgpu_imu.h" #include "soc15.h" #include "amdgpu_ras.h" +#include "amdgpu_ring_mux.h" /* GFX current status */ #define AMDGPU_GFX_NORMAL_MODE 0x00000000L @@ -352,6 +353,9 @@ struct amdgpu_gfx { struct amdgpu_gfx_ras *ras; bool is_poweron; + + struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS]; + struct amdgpu_ring_mux muxer; }; #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 4365ede42855..02a4c93673ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -479,6 +479,12 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) unsigned i; unsigned vmhub, inv_eng; + if (adev->enable_mes) { + /* reserve engine 5 for firmware */ + for (vmhub = 0; vmhub < AMDGPU_MAX_VMHUBS; vmhub++) + vm_inv_engs[vmhub] &= ~(1 << 5); + } + for (i = 0; i < adev->num_rings; ++i) { ring = adev->rings[i]; vmhub = ring->funcs->vmhub; @@ -543,6 +549,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) /* YELLOW_CARP*/ case IP_VERSION(10, 3, 3): case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index a48ea62b12b0..2dadcfe43d03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -51,6 +51,8 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_hmm.h" +#define MAX_WALK_BYTE (2UL << 30) + /** * amdgpu_hmm_invalidate_gfx - callback to notify about mm change * @@ -103,17 +105,11 @@ static bool amdgpu_hmm_invalidate_hsa(struct mmu_interval_notifier *mni, unsigned long cur_seq) { struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier); - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); if (!mmu_notifier_range_blockable(range)) return false; - mutex_lock(&adev->notifier_lock); - - mmu_interval_set_seq(mni, cur_seq); - - amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm); - mutex_unlock(&adev->notifier_lock); + amdgpu_amdkfd_evict_userptr(mni, cur_seq, bo->kfd_bo); return true; } @@ -163,6 +159,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, struct hmm_range **phmm_range) { struct hmm_range *hmm_range; + unsigned long end; unsigned long timeout; unsigned long i; unsigned long *pfns; @@ -184,25 +181,42 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, hmm_range->default_flags |= HMM_PFN_REQ_WRITE; hmm_range->hmm_pfns = pfns; hmm_range->start = start; - hmm_range->end = start + npages * PAGE_SIZE; + end = start + npages * PAGE_SIZE; hmm_range->dev_private_owner = owner; - /* Assuming 512MB takes maxmium 1 second to fault page address */ - timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT; - timeout = jiffies + msecs_to_jiffies(timeout); + do { + hmm_range->end = min(hmm_range->start + MAX_WALK_BYTE, end); + + pr_debug("hmm range: start = 0x%lx, end = 0x%lx", + hmm_range->start, hmm_range->end); + + /* Assuming 512MB takes maxmium 1 second to fault page address */ + timeout = max((hmm_range->end - hmm_range->start) >> 29, 1UL); + timeout *= HMM_RANGE_DEFAULT_TIMEOUT; + timeout = jiffies + msecs_to_jiffies(timeout); retry: - hmm_range->notifier_seq = mmu_interval_read_begin(notifier); - r = hmm_range_fault(hmm_range); - if (unlikely(r)) { - /* - * FIXME: This timeout should encompass the retry from - * mmu_interval_read_retry() as well. - */ - if (r == -EBUSY && !time_after(jiffies, timeout)) - goto retry; - goto out_free_pfns; - } + hmm_range->notifier_seq = mmu_interval_read_begin(notifier); + r = hmm_range_fault(hmm_range); + if (unlikely(r)) { + /* + * FIXME: This timeout should encompass the retry from + * mmu_interval_read_retry() as well. + */ + if (r == -EBUSY && !time_after(jiffies, timeout)) + goto retry; + goto out_free_pfns; + } + + if (hmm_range->end == end) + break; + hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT; + hmm_range->start = hmm_range->end; + schedule(); + } while (hmm_range->end < end); + + hmm_range->start = start; + hmm_range->hmm_pfns = pfns; /* * Due to default_flags, all pages are HMM_PFN_VALID or @@ -224,9 +238,9 @@ out_free_range: return r; } -int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range) +bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range) { - int r; + bool r; r = mmu_interval_read_retry(hmm_range->notifier, hmm_range->notifier_seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h index 13ed94d3b01b..e2edcd010ccc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h @@ -29,12 +29,13 @@ #include <linux/rwsem.h> #include <linux/workqueue.h> #include <linux/interval_tree.h> +#include <linux/mmu_notifier.h> int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, uint64_t start, uint64_t npages, bool readonly, void *owner, struct page **pages, struct hmm_range **phmm_range); -int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range); +bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range); #if defined(CONFIG_HMM_MIRROR) int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 774c77bb8f4e..bcccc348dbe2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } } + amdgpu_ring_ib_begin(ring); if (job && ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); @@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH) ring->funcs->emit_wave_limit(ring, false); + amdgpu_ring_ib_end(ring); amdgpu_ring_commit(ring); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 2a9a2593dc18..fcb711a11a5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -165,6 +165,26 @@ bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, atomic_read(&adev->gpu_reset_counter); } +/* Check if we need to switch to another set of resources */ +static bool amdgpu_vmid_gds_switch_needed(struct amdgpu_vmid *id, + struct amdgpu_job *job) +{ + return id->gds_base != job->gds_base || + id->gds_size != job->gds_size || + id->gws_base != job->gws_base || + id->gws_size != job->gws_size || + id->oa_base != job->oa_base || + id->oa_size != job->oa_size; +} + +/* Check if the id is compatible with the job */ +static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id, + struct amdgpu_job *job) +{ + return id->pd_gpu_addr == job->vm_pd_addr && + !amdgpu_vmid_gds_switch_needed(id, job); +} + /** * amdgpu_vmid_grab_idle - grab idle VMID * @@ -258,14 +278,15 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; uint64_t fence_context = adev->fence_context + ring->idx; bool needs_flush = vm->use_cpu_for_update; uint64_t updates = amdgpu_vm_tlb_seq(vm); int r; - *id = vm->reserved_vmid[vmhub]; + *id = id_mgr->reserved; if ((*id)->owner != vm->immediate.fence_context || - (*id)->pd_gpu_addr != job->vm_pd_addr || + !amdgpu_vmid_compatible(*id, job) || (*id)->flushed_updates < updates || !(*id)->last_flush || ((*id)->last_flush->context != fence_context && @@ -294,8 +315,8 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, if (r) return r; - (*id)->flushed_updates = updates; job->vm_needs_flush = needs_flush; + job->spm_update_needed = true; return 0; } @@ -333,7 +354,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, if ((*id)->owner != vm->immediate.fence_context) continue; - if ((*id)->pd_gpu_addr != job->vm_pd_addr) + if (!amdgpu_vmid_compatible(*id, job)) continue; if (!(*id)->last_flush || @@ -355,7 +376,6 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, if (r) return r; - (*id)->flushed_updates = updates; job->vm_needs_flush |= needs_flush; return 0; } @@ -408,22 +428,30 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r) goto error; - id->flushed_updates = amdgpu_vm_tlb_seq(vm); job->vm_needs_flush = true; } list_move_tail(&id->list, &id_mgr->ids_lru); } - id->pd_gpu_addr = job->vm_pd_addr; - id->owner = vm->immediate.fence_context; - + job->gds_switch_needed = amdgpu_vmid_gds_switch_needed(id, job); if (job->vm_needs_flush) { + id->flushed_updates = amdgpu_vm_tlb_seq(vm); dma_fence_put(id->last_flush); id->last_flush = NULL; } job->vmid = id - id_mgr->ids; job->pasid = vm->pasid; + + id->gds_base = job->gds_base; + id->gds_size = job->gds_size; + id->gws_base = job->gws_base; + id->gws_size = job->gws_size; + id->oa_base = job->oa_base; + id->oa_size = job->oa_size; + id->pd_gpu_addr = job->vm_pd_addr; + id->owner = vm->immediate.fence_context; + trace_amdgpu_vm_grab_id(vm, ring, job); error: @@ -435,31 +463,27 @@ int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned vmhub) { - struct amdgpu_vmid_mgr *id_mgr; - struct amdgpu_vmid *idle; - int r = 0; + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - id_mgr = &adev->vm_manager.id_mgr[vmhub]; mutex_lock(&id_mgr->lock); if (vm->reserved_vmid[vmhub]) goto unlock; - if (atomic_inc_return(&id_mgr->reserved_vmid_num) > - AMDGPU_VM_MAX_RESERVED_VMID) { - DRM_ERROR("Over limitation of reserved vmid\n"); - atomic_dec(&id_mgr->reserved_vmid_num); - r = -EINVAL; - goto unlock; + + ++id_mgr->reserved_use_count; + if (!id_mgr->reserved) { + struct amdgpu_vmid *id; + + id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, + list); + /* Remove from normal round robin handling */ + list_del_init(&id->list); + id_mgr->reserved = id; } - /* Select the first entry VMID */ - idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list); - list_del_init(&idle->list); - vm->reserved_vmid[vmhub] = idle; - mutex_unlock(&id_mgr->lock); + vm->reserved_vmid[vmhub] = true; - return 0; unlock: mutex_unlock(&id_mgr->lock); - return r; + return 0; } void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, @@ -469,12 +493,12 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; mutex_lock(&id_mgr->lock); - if (vm->reserved_vmid[vmhub]) { - list_add(&vm->reserved_vmid[vmhub]->list, - &id_mgr->ids_lru); - vm->reserved_vmid[vmhub] = NULL; - atomic_dec(&id_mgr->reserved_vmid_num); + if (vm->reserved_vmid[vmhub] && + !--id_mgr->reserved_use_count) { + /* give the reserved ID back to normal round robin */ + list_add(&id_mgr->reserved->list, &id_mgr->ids_lru); } + vm->reserved_vmid[vmhub] = false; mutex_unlock(&id_mgr->lock); } @@ -541,7 +565,7 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) mutex_init(&id_mgr->lock); INIT_LIST_HEAD(&id_mgr->ids_lru); - atomic_set(&id_mgr->reserved_vmid_num, 0); + id_mgr->reserved_use_count = 0; /* manage only VMIDs not used by KFD */ id_mgr->num_ids = adev->vm_manager.first_kfd_vmid; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 57efe61dceed..d1cc09b45da4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -67,7 +67,8 @@ struct amdgpu_vmid_mgr { unsigned num_ids; struct list_head ids_lru; struct amdgpu_vmid ids[AMDGPU_NUM_VMID]; - atomic_t reserved_vmid_num; + struct amdgpu_vmid *reserved; + unsigned int reserved_use_count; }; int amdgpu_pasid_alloc(unsigned int bits); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 032651a655f0..9e549923622b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -174,7 +174,12 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) drm_sched_job_cleanup(s_job); amdgpu_sync_free(&job->explicit_sync); - dma_fence_put(&job->hw_fence); + + /* only put the hw fence if has embedded fence */ + if (!job->hw_fence.ops) + kfree(job); + else + dma_fence_put(&job->hw_fence); } void amdgpu_job_set_gang_leader(struct amdgpu_job *job, @@ -245,15 +250,15 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, struct dma_fence *fence = NULL; int r; + if (!fence && job->gang_submit) + fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); + while (!fence && job->vm && !job->vmid) { r = amdgpu_vmid_grab(job->vm, ring, job, &fence); if (r) DRM_ERROR("Error getting VM ID (%d)\n", r); } - if (!fence && job->gang_submit) - fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); - return fence; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index a372802ea4e0..52f2e313ea17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -53,6 +53,8 @@ struct amdgpu_job { uint32_t preamble_status; uint32_t preemption_status; bool vm_needs_flush; + bool gds_switch_needed; + bool spm_update_needed; uint64_t vm_pd_addr; unsigned vmid; unsigned pasid; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index ba348046fcec..7aa7e52ca784 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -431,7 +431,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_VCN_DEC: type = AMD_IP_BLOCK_TYPE_VCN; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->uvd.harvest_config & (1 << i)) + if (adev->vcn.harvest_config & (1 << i)) continue; if (adev->vcn.inst[i].ring_dec.sched.ready) @@ -443,7 +443,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_VCN_ENC: type = AMD_IP_BLOCK_TYPE_VCN; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->uvd.harvest_config & (1 << i)) + if (adev->vcn.harvest_config & (1 << i)) continue; for (j = 0; j < adev->vcn.num_enc_rings; j++) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 974e85d8b6cc..4e684c2afc70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -346,17 +346,16 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, * @adev: amdgpu device object * @offset: offset of the BO * @size: size of the BO - * @domain: where to place it * @bo_ptr: used to initialize BOs in structures * @cpu_addr: optional CPU address mapping * - * Creates a kernel BO at a specific offset in the address space of the domain. + * Creates a kernel BO at a specific offset in VRAM. * * Returns: * 0 on success, negative error code otherwise. */ int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, - uint64_t offset, uint64_t size, uint32_t domain, + uint64_t offset, uint64_t size, struct amdgpu_bo **bo_ptr, void **cpu_addr) { struct ttm_operation_ctx ctx = { false, false }; @@ -366,8 +365,9 @@ int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, offset &= PAGE_MASK; size = ALIGN(size, PAGE_SIZE); - r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, domain, bo_ptr, - NULL, cpu_addr); + r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL, + cpu_addr); if (r) return r; @@ -422,6 +422,8 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, if (*bo == NULL) return; + WARN_ON(amdgpu_ttm_adev((*bo)->tbo.bdev)->in_suspend); + if (likely(amdgpu_bo_reserve(*bo, true) == 0)) { if (cpu_addr) amdgpu_bo_kunmap(*bo); @@ -446,27 +448,24 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, /* * If GTT is part of requested domains the check must succeed to - * allow fall back to GTT + * allow fall back to GTT. */ if (domain & AMDGPU_GEM_DOMAIN_GTT) { man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); - if (size < man->size) + if (man && size < man->size) return true; - else - goto fail; - } - - if (domain & AMDGPU_GEM_DOMAIN_VRAM) { + else if (!man) + WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized"); + goto fail; + } else if (domain & AMDGPU_GEM_DOMAIN_VRAM) { man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); - if (size < man->size) + if (man && size < man->size) return true; - else - goto fail; + goto fail; } - /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */ return true; @@ -1510,7 +1509,8 @@ u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo) uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, uint32_t domain) { - if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) { + if ((domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) && + ((adev->asic_type == CHIP_CARRIZO) || (adev->asic_type == CHIP_STONEY))) { domain = AMDGPU_GEM_DOMAIN_VRAM; if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD) domain = AMDGPU_GEM_DOMAIN_GTT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 147b79c10cbb..93207badf83f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -284,7 +284,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, u32 domain, struct amdgpu_bo **bo_ptr, u64 *gpu_addr, void **cpu_addr); int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, - uint64_t offset, uint64_t size, uint32_t domain, + uint64_t offset, uint64_t size, struct amdgpu_bo **bo_ptr, void **cpu_addr); int amdgpu_bo_create_user(struct amdgpu_device *adev, struct amdgpu_bo_param *bp, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 98dbf4e5aae9..7a2fc920739b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -165,6 +165,7 @@ static int psp_early_init(void *handle) case IP_VERSION(13, 0, 5): case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 11): psp_v13_0_set_psp_funcs(psp); psp->autoload_supported = true; break; @@ -198,6 +199,7 @@ void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx) { amdgpu_bo_free_kernel(&mem_ctx->shared_bo, &mem_ctx->shared_mc_addr, &mem_ctx->shared_buf); + mem_ctx->shared_bo = NULL; } static void psp_free_shared_bufs(struct psp_context *psp) @@ -208,6 +210,7 @@ static void psp_free_shared_bufs(struct psp_context *psp) /* free TMR memory buffer */ pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr); + psp->tmr_bo = NULL; /* free xgmi shared memory */ psp_ta_free_shared_buf(&psp->xgmi_context.context.mem_context); @@ -510,26 +513,22 @@ static int psp_sw_fini(void *handle) struct psp_gfx_cmd_resp *cmd = psp->cmd; psp_memory_training_fini(psp); - if (psp->sos_fw) { - release_firmware(psp->sos_fw); - psp->sos_fw = NULL; - } - if (psp->asd_fw) { - release_firmware(psp->asd_fw); - psp->asd_fw = NULL; - } - if (psp->ta_fw) { - release_firmware(psp->ta_fw); - psp->ta_fw = NULL; - } - if (psp->cap_fw) { - release_firmware(psp->cap_fw); - psp->cap_fw = NULL; - } - if (psp->toc_fw) { - release_firmware(psp->toc_fw); - psp->toc_fw = NULL; - } + + release_firmware(psp->sos_fw); + psp->sos_fw = NULL; + + release_firmware(psp->asd_fw); + psp->asd_fw = NULL; + + release_firmware(psp->ta_fw); + psp->ta_fw = NULL; + + release_firmware(psp->cap_fw); + psp->cap_fw = NULL; + + release_firmware(psp->toc_fw); + psp->toc_fw = NULL; + if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) || adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7)) psp_sysfs_fini(adev); @@ -769,7 +768,7 @@ static int psp_load_toc(struct psp_context *psp, /* Set up Trusted Memory Region */ static int psp_tmr_init(struct psp_context *psp) { - int ret; + int ret = 0; int tmr_size; void *tmr_buf; void **pptr; @@ -796,10 +795,12 @@ static int psp_tmr_init(struct psp_context *psp) } } - pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; - ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, - AMDGPU_GEM_DOMAIN_VRAM, - &psp->tmr_bo, &psp->tmr_mc_addr, pptr); + if (!psp->tmr_bo) { + pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; + ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->tmr_bo, &psp->tmr_mc_addr, pptr); + } return ret; } @@ -857,7 +858,7 @@ static int psp_tmr_unload(struct psp_context *psp) struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); psp_prep_tmr_unload_cmd_buf(psp, cmd); - dev_info(psp->adev->dev, "free PSP TMR buffer\n"); + dev_dbg(psp->adev->dev, "free PSP TMR buffer\n"); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); @@ -2727,8 +2728,6 @@ static int psp_suspend(void *handle) } out: - psp_free_shared_bufs(psp); - return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 077404a9c935..ad490c1e2f57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1267,7 +1267,7 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev, struct amdgpu_ras *con = container_of(attr, struct amdgpu_ras, features_attr); - return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features); + return sysfs_emit(buf, "feature mask: 0x%x\n", con->features); } static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index d3558c34d406..dc474b809604 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -569,3 +569,15 @@ int amdgpu_ring_init_mqd(struct amdgpu_ring *ring) return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop); } + +void amdgpu_ring_ib_begin(struct amdgpu_ring *ring) +{ + if (ring->is_sw_ring) + amdgpu_sw_ring_ib_begin(ring); +} + +void amdgpu_ring_ib_end(struct amdgpu_ring *ring) +{ + if (ring->is_sw_ring) + amdgpu_sw_ring_ib_end(ring); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 82c178a9033a..f752c7ae7f60 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -39,6 +39,7 @@ struct amdgpu_vm; #define AMDGPU_MAX_RINGS 28 #define AMDGPU_MAX_HWIP_RINGS 8 #define AMDGPU_MAX_GFX_RINGS 2 +#define AMDGPU_MAX_SW_GFX_RINGS 2 #define AMDGPU_MAX_COMPUTE_RINGS 8 #define AMDGPU_MAX_VCE_RINGS 3 #define AMDGPU_MAX_UVD_ENC_RINGS 2 @@ -59,6 +60,7 @@ enum amdgpu_ring_priority_level { #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) #define AMDGPU_FENCE_FLAG_INT (1 << 1) #define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2) +#define AMDGPU_FENCE_FLAG_EXEC (1 << 3) #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) @@ -143,8 +145,13 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, uint32_t wait_seq, signed long timeout); unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); + void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop); +u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring); +void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, + ktime_t timestamp); + /* * Rings. */ @@ -279,6 +286,10 @@ struct amdgpu_ring { bool is_mes_queue; uint32_t hw_queue_id; struct amdgpu_mes_ctx_data *mes_ctx; + + bool is_sw_ring; + unsigned int entry_index; + }; #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib))) @@ -307,6 +318,9 @@ struct amdgpu_ring { #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r) int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); +void amdgpu_ring_ib_begin(struct amdgpu_ring *ring); +void amdgpu_ring_ib_end(struct amdgpu_ring *ring); + void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_ring_commit(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c new file mode 100644 index 000000000000..62079f0e3ee8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c @@ -0,0 +1,514 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/slab.h> +#include <drm/drm_print.h> + +#include "amdgpu_ring_mux.h" +#include "amdgpu_ring.h" +#include "amdgpu.h" + +#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2) +#define AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US 10000 + +static const struct ring_info { + unsigned int hw_pio; + const char *ring_name; +} sw_ring_info[] = { + { AMDGPU_RING_PRIO_DEFAULT, "gfx_low"}, + { AMDGPU_RING_PRIO_2, "gfx_high"}, +}; + +static struct kmem_cache *amdgpu_mux_chunk_slab; + +static inline struct amdgpu_mux_entry *amdgpu_ring_mux_sw_entry(struct amdgpu_ring_mux *mux, + struct amdgpu_ring *ring) +{ + return ring->entry_index < mux->ring_entry_size ? + &mux->ring_entry[ring->entry_index] : NULL; +} + +/* copy packages on sw ring range[begin, end) */ +static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, + struct amdgpu_ring *ring, + u64 s_start, u64 s_end) +{ + u64 start, end; + struct amdgpu_ring *real_ring = mux->real_ring; + + start = s_start & ring->buf_mask; + end = s_end & ring->buf_mask; + + if (start == end) { + DRM_ERROR("no more data copied from sw ring\n"); + return; + } + if (start > end) { + amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - start); + amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start], + (ring->ring_size >> 2) - start); + amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[0], end); + } else { + amdgpu_ring_alloc(real_ring, end - start); + amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start], end - start); + } +} + +static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux) +{ + struct amdgpu_mux_entry *e = NULL; + struct amdgpu_mux_chunk *chunk; + uint32_t seq, last_seq; + int i; + + /*find low priority entries:*/ + if (!mux->s_resubmit) + return; + + for (i = 0; i < mux->num_ring_entries; i++) { + if (mux->ring_entry[i].ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) { + e = &mux->ring_entry[i]; + break; + } + } + + if (!e) { + DRM_ERROR("%s no low priority ring found\n", __func__); + return; + } + + last_seq = atomic_read(&e->ring->fence_drv.last_seq); + seq = mux->seqno_to_resubmit; + if (last_seq < seq) { + /*resubmit all the fences between (last_seq, seq]*/ + list_for_each_entry(chunk, &e->list, entry) { + if (chunk->sync_seq > last_seq && chunk->sync_seq <= seq) { + amdgpu_fence_update_start_timestamp(e->ring, + chunk->sync_seq, + ktime_get()); + amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, e->ring, + chunk->start, + chunk->end); + mux->wptr_resubmit = chunk->end; + amdgpu_ring_commit(mux->real_ring); + } + } + } + + del_timer(&mux->resubmit_timer); + mux->s_resubmit = false; +} + +static void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux) +{ + mod_timer(&mux->resubmit_timer, jiffies + AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT); +} + +static void amdgpu_mux_resubmit_fallback(struct timer_list *t) +{ + struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer); + + if (!spin_trylock(&mux->lock)) { + amdgpu_ring_mux_schedule_resubmit(mux); + DRM_ERROR("reschedule resubmit\n"); + return; + } + amdgpu_mux_resubmit_chunks(mux); + spin_unlock(&mux->lock); +} + +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, + unsigned int entry_size) +{ + mux->real_ring = ring; + mux->num_ring_entries = 0; + + mux->ring_entry = kcalloc(entry_size, sizeof(struct amdgpu_mux_entry), GFP_KERNEL); + if (!mux->ring_entry) + return -ENOMEM; + + mux->ring_entry_size = entry_size; + mux->s_resubmit = false; + + amdgpu_mux_chunk_slab = kmem_cache_create("amdgpu_mux_chunk", + sizeof(struct amdgpu_mux_chunk), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!amdgpu_mux_chunk_slab) { + DRM_ERROR("create amdgpu_mux_chunk cache failed\n"); + return -ENOMEM; + } + + spin_lock_init(&mux->lock); + timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, 0); + + return 0; +} + +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux) +{ + struct amdgpu_mux_entry *e; + struct amdgpu_mux_chunk *chunk, *chunk2; + int i; + + for (i = 0; i < mux->num_ring_entries; i++) { + e = &mux->ring_entry[i]; + list_for_each_entry_safe(chunk, chunk2, &e->list, entry) { + list_del(&chunk->entry); + kmem_cache_free(amdgpu_mux_chunk_slab, chunk); + } + } + kmem_cache_destroy(amdgpu_mux_chunk_slab); + kfree(mux->ring_entry); + mux->ring_entry = NULL; + mux->num_ring_entries = 0; + mux->ring_entry_size = 0; +} + +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) +{ + struct amdgpu_mux_entry *e; + + if (mux->num_ring_entries >= mux->ring_entry_size) { + DRM_ERROR("add sw ring exceeding max entry size\n"); + return -ENOENT; + } + + e = &mux->ring_entry[mux->num_ring_entries]; + ring->entry_index = mux->num_ring_entries; + e->ring = ring; + + INIT_LIST_HEAD(&e->list); + mux->num_ring_entries += 1; + return 0; +} + +void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr) +{ + struct amdgpu_mux_entry *e; + + spin_lock(&mux->lock); + + if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) + amdgpu_mux_resubmit_chunks(mux); + + e = amdgpu_ring_mux_sw_entry(mux, ring); + if (!e) { + DRM_ERROR("cannot find entry for sw ring\n"); + spin_unlock(&mux->lock); + return; + } + + /* We could skip this set wptr as preemption in process. */ + if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && mux->pending_trailing_fence_signaled) { + spin_unlock(&mux->lock); + return; + } + + e->sw_cptr = e->sw_wptr; + /* Update cptr if the package already copied in resubmit functions */ + if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && e->sw_cptr < mux->wptr_resubmit) + e->sw_cptr = mux->wptr_resubmit; + e->sw_wptr = wptr; + e->start_ptr_in_hw_ring = mux->real_ring->wptr; + + /* Skip copying for the packages already resubmitted.*/ + if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT || mux->wptr_resubmit < wptr) { + amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr); + e->end_ptr_in_hw_ring = mux->real_ring->wptr; + amdgpu_ring_commit(mux->real_ring); + } else { + e->end_ptr_in_hw_ring = mux->real_ring->wptr; + } + spin_unlock(&mux->lock); +} + +u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) +{ + struct amdgpu_mux_entry *e; + + e = amdgpu_ring_mux_sw_entry(mux, ring); + if (!e) { + DRM_ERROR("cannot find entry for sw ring\n"); + return 0; + } + + return e->sw_wptr; +} + +/** + * amdgpu_ring_mux_get_rptr - get the readptr of the software ring + * @mux: the multiplexer the software rings attach to + * @ring: the software ring of which we calculate the readptr + * + * The return value of the readptr is not precise while the other rings could + * write data onto the real ring buffer.After overwriting on the real ring, we + * can not decide if our packages have been excuted or not read yet. However, + * this function is only called by the tools such as umr to collect the latest + * packages for the hang analysis. We assume the hang happens near our latest + * submit. Thus we could use the following logic to give the clue: + * If the readptr is between start and end, then we return the copy pointer + * plus the distance from start to readptr. If the readptr is before start, we + * return the copy pointer. Lastly, if the readptr is past end, we return the + * write pointer. + */ +u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) +{ + struct amdgpu_mux_entry *e; + u64 readp, offset, start, end; + + e = amdgpu_ring_mux_sw_entry(mux, ring); + if (!e) { + DRM_ERROR("no sw entry found!\n"); + return 0; + } + + readp = amdgpu_ring_get_rptr(mux->real_ring); + + start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask; + end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask; + if (start > end) { + if (readp <= end) + readp += mux->real_ring->ring_size >> 2; + end += mux->real_ring->ring_size >> 2; + } + + if (start <= readp && readp <= end) { + offset = readp - start; + e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask; + } else if (readp < start) { + e->sw_rptr = e->sw_cptr; + } else { + /* end < readptr */ + e->sw_rptr = e->sw_wptr; + } + + return e->sw_rptr; +} + +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ring_mux *mux = &adev->gfx.muxer; + + WARN_ON(!ring->is_sw_ring); + return amdgpu_ring_mux_get_rptr(mux, ring); +} + +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ring_mux *mux = &adev->gfx.muxer; + + WARN_ON(!ring->is_sw_ring); + return amdgpu_ring_mux_get_wptr(mux, ring); +} + +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ring_mux *mux = &adev->gfx.muxer; + + WARN_ON(!ring->is_sw_ring); + amdgpu_ring_mux_set_wptr(mux, ring, ring->wptr); +} + +/* Override insert_nop to prevent emitting nops to the software rings */ +void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + WARN_ON(!ring->is_sw_ring); +} + +const char *amdgpu_sw_ring_name(int idx) +{ + return idx < ARRAY_SIZE(sw_ring_info) ? + sw_ring_info[idx].ring_name : NULL; +} + +unsigned int amdgpu_sw_ring_priority(int idx) +{ + return idx < ARRAY_SIZE(sw_ring_info) ? + sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT; +} + +/*Scan on low prio rings to have unsignaled fence and high ring has no fence.*/ +static int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux) +{ + struct amdgpu_ring *ring; + int i, need_preempt; + + need_preempt = 0; + for (i = 0; i < mux->num_ring_entries; i++) { + ring = mux->ring_entry[i].ring; + if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT && + amdgpu_fence_count_emitted(ring) > 0) + return 0; + if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && + amdgpu_fence_last_unsignaled_time_us(ring) > + AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US) + need_preempt = 1; + } + return need_preempt && !mux->s_resubmit; +} + +/* Trigger Mid-Command Buffer Preemption (MCBP) and find if we need to resubmit. */ +static int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux) +{ + int r; + + spin_lock(&mux->lock); + mux->pending_trailing_fence_signaled = true; + r = amdgpu_ring_preempt_ib(mux->real_ring); + spin_unlock(&mux->lock); + return r; +} + +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ring_mux *mux = &adev->gfx.muxer; + + WARN_ON(!ring->is_sw_ring); + if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) { + if (amdgpu_mcbp_scan(mux) > 0) + amdgpu_mcbp_trigger_preempt(mux); + return; + } + + amdgpu_ring_mux_start_ib(mux, ring); +} + +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ring_mux *mux = &adev->gfx.muxer; + + WARN_ON(!ring->is_sw_ring); + if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) + return; + amdgpu_ring_mux_end_ib(mux, ring); +} + +void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) +{ + struct amdgpu_mux_entry *e; + struct amdgpu_mux_chunk *chunk; + + spin_lock(&mux->lock); + amdgpu_mux_resubmit_chunks(mux); + spin_unlock(&mux->lock); + + e = amdgpu_ring_mux_sw_entry(mux, ring); + if (!e) { + DRM_ERROR("cannot find entry!\n"); + return; + } + + chunk = kmem_cache_alloc(amdgpu_mux_chunk_slab, GFP_KERNEL); + if (!chunk) { + DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n"); + return; + } + + chunk->start = ring->wptr; + list_add_tail(&chunk->entry, &e->list); +} + +static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) +{ + uint32_t last_seq = 0; + struct amdgpu_mux_entry *e; + struct amdgpu_mux_chunk *chunk, *tmp; + + e = amdgpu_ring_mux_sw_entry(mux, ring); + if (!e) { + DRM_ERROR("cannot find entry!\n"); + return; + } + + last_seq = atomic_read(&ring->fence_drv.last_seq); + + list_for_each_entry_safe(chunk, tmp, &e->list, entry) { + if (chunk->sync_seq <= last_seq) { + list_del(&chunk->entry); + kmem_cache_free(amdgpu_mux_chunk_slab, chunk); + } + } +} + +void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) +{ + struct amdgpu_mux_entry *e; + struct amdgpu_mux_chunk *chunk; + + e = amdgpu_ring_mux_sw_entry(mux, ring); + if (!e) { + DRM_ERROR("cannot find entry!\n"); + return; + } + + chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry); + if (!chunk) { + DRM_ERROR("cannot find chunk!\n"); + return; + } + + chunk->end = ring->wptr; + chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq); + + scan_and_remove_signaled_chunk(mux, ring); +} + +bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux) +{ + struct amdgpu_mux_entry *e; + struct amdgpu_ring *ring = NULL; + int i; + + if (!mux->pending_trailing_fence_signaled) + return false; + + if (mux->real_ring->trail_seq != le32_to_cpu(*mux->real_ring->trail_fence_cpu_addr)) + return false; + + for (i = 0; i < mux->num_ring_entries; i++) { + e = &mux->ring_entry[i]; + if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) { + ring = e->ring; + break; + } + } + + if (!ring) { + DRM_ERROR("cannot find low priority ring\n"); + return false; + } + + amdgpu_fence_process(ring); + if (amdgpu_fence_count_emitted(ring) > 0) { + mux->s_resubmit = true; + mux->seqno_to_resubmit = ring->fence_drv.sync_seq; + amdgpu_ring_mux_schedule_resubmit(mux); + } + + mux->pending_trailing_fence_signaled = false; + return true; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h new file mode 100644 index 000000000000..4be45fc14954 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h @@ -0,0 +1,103 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_RING_MUX__ +#define __AMDGPU_RING_MUX__ + +#include <linux/timer.h> +#include <linux/spinlock.h> +#include "amdgpu_ring.h" + +struct amdgpu_ring; + +/** + * struct amdgpu_mux_entry - the entry recording software rings copying information. + * @ring: the pointer to the software ring. + * @start_ptr_in_hw_ring: last start location copied to in the hardware ring. + * @end_ptr_in_hw_ring: last end location copied to in the hardware ring. + * @sw_cptr: the position of the copy pointer in the sw ring. + * @sw_rptr: the read pointer in software ring. + * @sw_wptr: the write pointer in software ring. + * @list: list head for amdgpu_mux_chunk + */ +struct amdgpu_mux_entry { + struct amdgpu_ring *ring; + u64 start_ptr_in_hw_ring; + u64 end_ptr_in_hw_ring; + u64 sw_cptr; + u64 sw_rptr; + u64 sw_wptr; + struct list_head list; +}; + +struct amdgpu_ring_mux { + struct amdgpu_ring *real_ring; + + struct amdgpu_mux_entry *ring_entry; + unsigned int num_ring_entries; + unsigned int ring_entry_size; + /*the lock for copy data from different software rings*/ + spinlock_t lock; + bool s_resubmit; + uint32_t seqno_to_resubmit; + u64 wptr_resubmit; + struct timer_list resubmit_timer; + + bool pending_trailing_fence_signaled; +}; + +/** + * struct amdgpu_mux_chunk - save the location of indirect buffer's package on softare rings. + * @entry: the list entry. + * @sync_seq: the fence seqno related with the saved IB. + * @start:- start location on the software ring. + * @end:- end location on the software ring. + */ +struct amdgpu_mux_chunk { + struct list_head entry; + uint32_t sync_seq; + u64 start; + u64 end; +}; + +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, + unsigned int entry_size); +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux); +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); +void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr); +u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); +u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); +void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); +void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); +bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux); + +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring); +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring); +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring); +void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring); +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring); +const char *amdgpu_sw_ring_name(int idx); +unsigned int amdgpu_sw_ring_priority(int idx); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 7b5074e776f4..55e0284b2bdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -695,8 +695,19 @@ out_unlock: return r; } +/* amdgpu_ttm_tt_discard_user_pages - Discard range and pfn array allocations + */ +void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, + struct hmm_range *range) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + + if (gtt && gtt->userptr && range) + amdgpu_hmm_range_get_pages_done(range); +} + /* - * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change + * amdgpu_ttm_tt_get_user_pages_done - stop HMM track the CPU page table change * Check if the pages backing this ttm range have been invalidated * * Returns: true if pages are still valid @@ -714,10 +725,6 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, WARN_ONCE(!range->hmm_pfns, "No user pages to check\n"); - /* - * FIXME: Must always hold notifier_lock for this, and must - * not ignore the return code. - */ return !amdgpu_hmm_range_get_pages_done(range); } #endif @@ -1545,7 +1552,7 @@ static void amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->mman.drv_vram_usage_reserved_bo, NULL, - NULL); + &adev->mman.drv_vram_usage_va); } /** @@ -1569,7 +1576,6 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) return amdgpu_bo_create_kernel_at(adev, adev->mman.fw_vram_usage_start_offset, adev->mman.fw_vram_usage_size, - AMDGPU_GEM_DOMAIN_VRAM, &adev->mman.fw_vram_usage_reserved_bo, &adev->mman.fw_vram_usage_va); } @@ -1583,8 +1589,9 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) */ static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev) { - uint64_t vram_size = adev->gmc.visible_vram_size; + u64 vram_size = adev->gmc.visible_vram_size; + adev->mman.drv_vram_usage_va = NULL; adev->mman.drv_vram_usage_reserved_bo = NULL; if (adev->mman.drv_vram_usage_size == 0 || @@ -1594,9 +1601,8 @@ static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev) return amdgpu_bo_create_kernel_at(adev, adev->mman.drv_vram_usage_start_offset, adev->mman.drv_vram_usage_size, - AMDGPU_GEM_DOMAIN_VRAM, &adev->mman.drv_vram_usage_reserved_bo, - NULL); + &adev->mman.drv_vram_usage_va); } /* @@ -1675,7 +1681,6 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) ret = amdgpu_bo_create_kernel_at(adev, ctx->c2p_train_data_offset, ctx->train_data_size, - AMDGPU_GEM_DOMAIN_VRAM, &ctx->c2p_bo, NULL); if (ret) { @@ -1689,7 +1694,6 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) ret = amdgpu_bo_create_kernel_at(adev, adev->gmc.real_vram_size - adev->mman.discovery_tmr_size, adev->mman.discovery_tmr_size, - AMDGPU_GEM_DOMAIN_VRAM, &adev->mman.discovery_memory, NULL); if (ret) { @@ -1790,21 +1794,18 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) * avoid display artifacts while transitioning between pre-OS * and driver. */ r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size, - AMDGPU_GEM_DOMAIN_VRAM, &adev->mman.stolen_vga_memory, NULL); if (r) return r; r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size, adev->mman.stolen_extended_size, - AMDGPU_GEM_DOMAIN_VRAM, &adev->mman.stolen_extended_memory, NULL); if (r) return r; r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset, adev->mman.stolen_reserved_size, - AMDGPU_GEM_DOMAIN_VRAM, &adev->mman.stolen_reserved_memory, NULL); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index b391c8d076ff..e2cd5894afc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -90,6 +90,7 @@ struct amdgpu_mman { u64 drv_vram_usage_start_offset; u64 drv_vram_usage_size; struct amdgpu_bo *drv_vram_usage_reserved_bo; + void *drv_vram_usage_va; /* PAGE_SIZE'd BO for process memory r/w over SDMA. */ struct amdgpu_bo *sdma_access_bo; @@ -158,6 +159,8 @@ uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, struct hmm_range **range); +void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, + struct hmm_range *range); bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, struct hmm_range *range); #else @@ -167,6 +170,10 @@ static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, { return -EPERM; } +static inline void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, + struct hmm_range *range) +{ +} static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, struct hmm_range *range) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 33f3415096f7..b1622ac9949f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -156,6 +156,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) break; case IP_VERSION(3, 0, 2): fw_name = FIRMWARE_VANGOGH; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; break; case IP_VERSION(3, 0, 16): fw_name = FIRMWARE_DIMGREY_CAVEFISH; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index a226a6c48fb7..2994b9db196f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -395,7 +395,6 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev) */ if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, AMDGPU_GPU_PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &bo, NULL)) DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp); @@ -428,11 +427,17 @@ static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev, struct eeprom_table_record bp; uint64_t retired_page; uint32_t bp_idx, bp_cnt; + void *vram_usage_va = NULL; + + if (adev->mman.fw_vram_usage_va) + vram_usage_va = adev->mman.fw_vram_usage_va; + else + vram_usage_va = adev->mman.drv_vram_usage_va; if (bp_block_size) { bp_cnt = bp_block_size / sizeof(uint64_t); for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) { - retired_page = *(uint64_t *)(adev->mman.fw_vram_usage_va + + retired_page = *(uint64_t *)(vram_usage_va + bp_block_offset + bp_idx * sizeof(uint64_t)); bp.retired_page = retired_page; @@ -643,7 +648,9 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) adev->virt.fw_reserve.p_vf2pf = NULL; adev->virt.vf2pf_update_interval_ms = 0; - if (adev->mman.fw_vram_usage_va != NULL) { + if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) { + DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!"); + } else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) { /* go through this logic in ip_init and reset to init workqueue*/ amdgpu_virt_exchange_data(adev); @@ -666,32 +673,40 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev) uint32_t bp_block_size = 0; struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL; - if (adev->mman.fw_vram_usage_va != NULL) { - - adev->virt.fw_reserve.p_pf2vf = - (struct amd_sriov_msg_pf2vf_info_header *) - (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); - adev->virt.fw_reserve.p_vf2pf = - (struct amd_sriov_msg_vf2pf_info_header *) - (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10)); + if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) { + if (adev->mman.fw_vram_usage_va) { + adev->virt.fw_reserve.p_pf2vf = + (struct amd_sriov_msg_pf2vf_info_header *) + (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); + adev->virt.fw_reserve.p_vf2pf = + (struct amd_sriov_msg_vf2pf_info_header *) + (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10)); + } else if (adev->mman.drv_vram_usage_va) { + adev->virt.fw_reserve.p_pf2vf = + (struct amd_sriov_msg_pf2vf_info_header *) + (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); + adev->virt.fw_reserve.p_vf2pf = + (struct amd_sriov_msg_vf2pf_info_header *) + (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10)); + } amdgpu_virt_read_pf2vf_data(adev); amdgpu_virt_write_vf2pf_data(adev); /* bad page handling for version 2 */ if (adev->virt.fw_reserve.p_pf2vf->version == 2) { - pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf; + pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf; - bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) | - ((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000); - bp_block_size = pf2vf_v2->bp_block_size; + bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) | + ((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000); + bp_block_size = pf2vf_v2->bp_block_size; - if (bp_block_size && !adev->virt.ras_init_done) - amdgpu_virt_init_ras_err_handler_data(adev); + if (bp_block_size && !adev->virt.ras_init_done) + amdgpu_virt_init_ras_err_handler_data(adev); - if (adev->virt.ras_init_done) - amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size); - } + if (adev->virt.ras_init_done) + amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 7cf99f752e01..53ff91fc6cf6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -500,6 +500,8 @@ static int amdgpu_vkms_sw_init(void *handle) adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; + adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; + r = amdgpu_display_modeset_create_props(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 003aa9e47085..dc379dc22c77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -45,22 +45,43 @@ /** * DOC: GPUVM * - * GPUVM is similar to the legacy gart on older asics, however - * rather than there being a single global gart table - * for the entire GPU, there are multiple VM page tables active - * at any given time. The VM page tables can contain a mix - * vram pages and system memory pages and system memory pages + * GPUVM is the MMU functionality provided on the GPU. + * GPUVM is similar to the legacy GART on older asics, however + * rather than there being a single global GART table + * for the entire GPU, there can be multiple GPUVM page tables active + * at any given time. The GPUVM page tables can contain a mix + * VRAM pages and system pages (both memory and MMIO) and system pages * can be mapped as snooped (cached system pages) or unsnooped * (uncached system pages). - * Each VM has an ID associated with it and there is a page table - * associated with each VMID. When executing a command buffer, - * the kernel tells the ring what VMID to use for that command + * + * Each active GPUVM has an ID associated with it and there is a page table + * linked with each VMID. When executing a command buffer, + * the kernel tells the engine what VMID to use for that command * buffer. VMIDs are allocated dynamically as commands are submitted. * The userspace drivers maintain their own address space and the kernel * sets up their pages tables accordingly when they submit their * command buffers and a VMID is assigned. - * Cayman/Trinity support up to 8 active VMs at any given time; - * SI supports 16. + * The hardware supports up to 16 active GPUVMs at any given time. + * + * Each GPUVM is represented by a 1-2 or 1-5 level page table, depending + * on the ASIC family. GPUVM supports RWX attributes on each page as well + * as other features such as encryption and caching attributes. + * + * VMID 0 is special. It is the GPUVM used for the kernel driver. In + * addition to an aperture managed by a page table, VMID 0 also has + * several other apertures. There is an aperture for direct access to VRAM + * and there is a legacy AGP aperture which just forwards accesses directly + * to the matching system physical addresses (or IOVAs when an IOMMU is + * present). These apertures provide direct access to these memories without + * incurring the overhead of a page table. VMID 0 is used by the kernel + * driver for tasks like memory management. + * + * GPU clients (i.e., engines on the GPU) use GPUVM VMIDs to access memory. + * For user applications, each application can have their own unique GPUVM + * address space. The application manages the address space and the kernel + * driver manages the GPUVM page tables for each process. If an GPU client + * accesses an invalid page, it will generate a GPU page fault, similar to + * accessing an invalid page on a CPU. */ #define START(node) ((node)->start) @@ -463,25 +484,20 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - struct amdgpu_vmid *id; - bool gds_switch_needed; - bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug; if (job->vmid == 0) return false; - id = &id_mgr->ids[job->vmid]; - gds_switch_needed = ring->funcs->emit_gds_switch && ( - id->gds_base != job->gds_base || - id->gds_size != job->gds_size || - id->gws_base != job->gws_base || - id->gws_size != job->gws_size || - id->oa_base != job->oa_base || - id->oa_size != job->oa_size); - - if (amdgpu_vmid_had_gpu_reset(adev, id)) + + if (job->vm_needs_flush || ring->has_compute_vm_bug) return true; - return vm_flush_needed || gds_switch_needed; + if (ring->funcs->emit_gds_switch && job->gds_switch_needed) + return true; + + if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid])) + return true; + + return false; } /** @@ -503,27 +519,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, unsigned vmhub = ring->funcs->vmhub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; - bool gds_switch_needed = ring->funcs->emit_gds_switch && ( - id->gds_base != job->gds_base || - id->gds_size != job->gds_size || - id->gws_base != job->gws_base || - id->gws_size != job->gws_size || - id->oa_base != job->oa_base || - id->oa_size != job->oa_size); + bool spm_update_needed = job->spm_update_needed; + bool gds_switch_needed = ring->funcs->emit_gds_switch && + job->gds_switch_needed; bool vm_flush_needed = job->vm_needs_flush; struct dma_fence *fence = NULL; bool pasid_mapping_needed = false; unsigned patch_offset = 0; - bool update_spm_vmid_needed = (job->vm && (job->vm->reserved_vmid[vmhub] != NULL)); int r; - if (update_spm_vmid_needed && adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid); - if (amdgpu_vmid_had_gpu_reset(adev, id)) { gds_switch_needed = true; vm_flush_needed = true; pasid_mapping_needed = true; + spm_update_needed = true; } mutex_lock(&id_mgr->lock); @@ -541,6 +550,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) return 0; + amdgpu_ring_ib_begin(ring); if (ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); @@ -555,6 +565,17 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (pasid_mapping_needed) amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); + if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) + adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid); + + if (!ring->is_mes_queue && ring->funcs->emit_gds_switch && + gds_switch_needed) { + amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, + job->gds_size, job->gws_base, + job->gws_size, job->oa_base, + job->oa_size); + } + if (vm_flush_needed || pasid_mapping_needed) { r = amdgpu_fence_emit(ring, &fence, NULL, 0); if (r) @@ -579,20 +600,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, } dma_fence_put(fence); - if (!ring->is_mes_queue && ring->funcs->emit_gds_switch && - gds_switch_needed) { - id->gds_base = job->gds_base; - id->gds_size = job->gds_size; - id->gws_base = job->gws_base; - id->gws_size = job->gws_size; - id->oa_base = job->oa_base; - id->oa_size = job->oa_size; - amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, - job->gds_size, job->gws_base, - job->gws_size, job->oa_base, - job->oa_size); - } - if (ring->funcs->patch_cond_exec) amdgpu_ring_patch_cond_exec(ring, patch_offset); @@ -601,6 +608,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, amdgpu_ring_emit_switch_buffer(ring); amdgpu_ring_emit_switch_buffer(ring); } + amdgpu_ring_ib_end(ring); return 0; } @@ -2360,7 +2368,6 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) union drm_amdgpu_vm *args = data; struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_fpriv *fpriv = filp->driver_priv; - long timeout = msecs_to_jiffies(2000); int r; switch (args->in.op) { @@ -2372,21 +2379,6 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return r; break; case AMDGPU_VM_OP_UNRESERVE_VMID: - if (amdgpu_sriov_runtime(adev)) - timeout = 8 * timeout; - - /* Wait vm idle to make sure the vmid set in SPM_VMID is - * not referenced anymore. - */ - r = amdgpu_bo_reserve(fpriv->vm.root.bo, true); - if (r) - return r; - - r = amdgpu_vm_wait_idle(&fpriv->vm, timeout); - if (r < 0) - return r; - - amdgpu_bo_unreserve(fpriv->vm.root.bo); amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 6546e786bf00..094bb4807303 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -119,9 +119,6 @@ struct amdgpu_bo_vm; /* Reserve 2MB at top/bottom of address space for kernel use */ #define AMDGPU_VA_RESERVED_SIZE (2ULL << 20) -/* max vmids dedicated for process */ -#define AMDGPU_VM_MAX_RESERVED_VMID 1 - /* See vm_update_mode */ #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) @@ -298,8 +295,7 @@ struct amdgpu_vm { struct dma_fence *last_unlocked; unsigned int pasid; - /* dedicated to vm */ - struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS]; + bool reserved_vmid[AMDGPU_MAX_VMHUBS]; /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ bool use_cpu_for_update; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 59cf64216fbb..535cd6569bcc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -238,8 +238,10 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, /* Wait for PD/PT moves to be completed */ dma_resv_iter_begin(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL); dma_resv_for_each_fence_unlocked(&cursor, fence) { + dma_fence_get(fence); r = drm_sched_job_add_dependency(&p->job->base, fence); if (r) { + dma_fence_put(fence); dma_resv_iter_end(&cursor); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 18c1a173d187..faa12146635c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -435,7 +435,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, if (place->flags & TTM_PL_FLAG_TOPDOWN) vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; - if (fpfn || lpfn != man->size) + if (fpfn || lpfn != mgr->mm.size) /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 47159e9a0884..4b9e7b050ccd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -386,7 +386,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) if (ret) { dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n"); kobject_put(&hive->kobj); - kfree(hive); hive = NULL; goto pro_end; } @@ -410,7 +409,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n"); ret = -ENOMEM; kobject_put(&hive->kobj); - kfree(hive); hive = NULL; goto pro_end; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 9d2c6523f546..a56c6e106d00 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -77,6 +77,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = { @@ -262,6 +266,7 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): soc15_program_register_sequence(adev, golden_settings_gc_11_0_1, (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); @@ -855,6 +860,7 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; break; case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -1284,6 +1290,7 @@ static int gfx_v11_0_sw_init(void *handle) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -2486,7 +2493,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) for (i = 0; i < adev->usec_timeout; i++) { cp_status = RREG32_SOC15(GC, 0, regCP_STAT); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1)) + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) || + adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4)) bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); else @@ -5050,6 +5058,7 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); break; default: @@ -5083,6 +5092,7 @@ static int gfx_v11_0_set_powergating_state(void *handle, amdgpu_gfx_off_ctrl(adev, enable); break; case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): gfx_v11_cntl_pg(adev, enable); amdgpu_gfx_off_ctrl(adev, enable); break; @@ -5106,6 +5116,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle, case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): gfx_v11_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 676832da75eb..f202b45c413c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -47,6 +47,7 @@ #include "amdgpu_ras.h" +#include "amdgpu_ring_mux.h" #include "gfx_v9_4.h" #include "gfx_v9_0.h" #include "gfx_v9_4_2.h" @@ -56,6 +57,7 @@ #include "asic_reg/gc/gc_9_0_default.h" #define GFX9_NUM_GFX_RINGS 1 +#define GFX9_NUM_SW_GFX_RINGS 2 #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L @@ -753,7 +755,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); -static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status); @@ -826,9 +828,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); if (action == PREEMPT_QUEUES_NO_UNMAP) { - amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); - amdgpu_ring_write(kiq_ring, seq); + amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask)); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + } else { amdgpu_ring_write(kiq_ring, 0); amdgpu_ring_write(kiq_ring, 0); @@ -2103,6 +2106,7 @@ static int gfx_v9_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + unsigned int hw_prio; switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 0, 1): @@ -2186,6 +2190,9 @@ static int gfx_v9_0_sw_init(void *handle) sprintf(ring->name, "gfx_%d", i); ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; + + /* disable scheduler on the real ring */ + ring->no_scheduler = true; r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -2193,6 +2200,41 @@ static int gfx_v9_0_sw_init(void *handle) return r; } + /* set up the software rings */ + if (adev->gfx.num_gfx_rings) { + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { + ring = &adev->gfx.sw_gfx_ring[i]; + ring->ring_obj = NULL; + sprintf(ring->name, amdgpu_sw_ring_name(i)); + ring->use_doorbell = true; + ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; + ring->is_sw_ring = true; + hw_prio = amdgpu_sw_ring_priority(i); + r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, + AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio, + NULL); + if (r) + return r; + ring->wptr = 0; + } + + /* init the muxer and add software rings */ + r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0], + GFX9_NUM_SW_GFX_RINGS); + if (r) { + DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r); + return r; + } + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { + r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer, + &adev->gfx.sw_gfx_ring[i]); + if (r) { + DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r); + return r; + } + } + } + /* set up the compute queues - allocate horizontally across pipes */ ring_id = 0; for (i = 0; i < adev->gfx.mec.num_mec; ++i) { @@ -2243,6 +2285,12 @@ static int gfx_v9_0_sw_fini(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->gfx.num_gfx_rings) { + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) + amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]); + amdgpu_ring_mux_fini(&adev->gfx.muxer); + } + for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) @@ -5157,11 +5205,17 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, control |= ib->length_dw | (vmid << 24); - if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { + if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) { control |= INDIRECT_BUFFER_PRE_ENB(1); + if (flags & AMDGPU_IB_PREEMPTED) + control |= INDIRECT_BUFFER_PRE_RESUME(1); + if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid) - gfx_v9_0_ring_emit_de_meta(ring); + gfx_v9_0_ring_emit_de_meta(ring, + (!amdgpu_sriov_vf(ring->adev) && + flags & AMDGPU_IB_PREEMPTED) ? + true : false); } amdgpu_ring_write(ring, header); @@ -5216,17 +5270,24 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; + bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; + uint32_t dw2 = 0; /* RELEASE_MEM - flush caches, send int */ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); - amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | - EOP_TC_NC_ACTION_EN) : - (EOP_TCL1_ACTION_EN | - EOP_TC_ACTION_EN | - EOP_TC_WB_ACTION_EN | - EOP_TC_MD_ACTION_EN)) | - EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | - EVENT_INDEX(5))); + + if (writeback) { + dw2 = EOP_TC_NC_ACTION_EN; + } else { + dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | + EOP_TC_MD_ACTION_EN; + } + dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | + EVENT_INDEX(5); + if (exec) + dw2 |= EOP_EXEC; + + amdgpu_ring_write(ring, dw2); amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); /* @@ -5331,33 +5392,135 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0); } -static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) +static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) { + struct amdgpu_device *adev = ring->adev; struct v9_ce_ib_state ce_payload = {0}; - uint64_t csa_addr; + uint64_t offset, ce_payload_gpu_addr; + void *ce_payload_cpu_addr; int cnt; cnt = (sizeof(ce_payload) >> 2) + 4 - 2; - csa_addr = amdgpu_csa_vaddr(ring->adev); + + if (ring->is_mes_queue) { + offset = offsetof(struct amdgpu_mes_ctx_meta_data, + gfx[0].gfx_meta_data) + + offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_gpu_addr = + amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); + ce_payload_cpu_addr = + amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + } else { + offset = offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; + } amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | WRITE_DATA_DST_SEL(8) | WR_CONFIRM) | WRITE_DATA_CACHE_POLICY(0)); - amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); - amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); - amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); + amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr)); + + if (resume) + amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr, + sizeof(ce_payload) >> 2); + else + amdgpu_ring_write_multiple(ring, (void *)&ce_payload, + sizeof(ce_payload) >> 2); +} + +static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) +{ + int i, r = 0; + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + /* assert preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, false); + + ring->trail_seq += 1; + amdgpu_ring_alloc(ring, 13); + gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, + ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT); + /*reset the CP_VMID_PREEMPT after trailing fence*/ + amdgpu_ring_emit_wreg(ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT), + 0x0); + + /* assert IB preemption, emit the trailing fence */ + kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, + ring->trail_fence_gpu_addr, + ring->trail_seq); + + amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + /* poll the trailing fence */ + for (i = 0; i < adev->usec_timeout; i++) { + if (ring->trail_seq == + le32_to_cpu(*ring->trail_fence_cpu_addr)) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) { + r = -EINVAL; + DRM_WARN("ring %d timeout to preempt ib\n", ring->idx); + } + + amdgpu_ring_commit(ring); + + /* deassert preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, true); + return r; } -static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) { + struct amdgpu_device *adev = ring->adev; struct v9_de_ib_state de_payload = {0}; - uint64_t csa_addr, gds_addr; + uint64_t offset, gds_addr, de_payload_gpu_addr; + void *de_payload_cpu_addr; int cnt; - csa_addr = amdgpu_csa_vaddr(ring->adev); - gds_addr = csa_addr + 4096; + if (ring->is_mes_queue) { + offset = offsetof(struct amdgpu_mes_ctx_meta_data, + gfx[0].gfx_meta_data) + + offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_gpu_addr = + amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); + de_payload_cpu_addr = + amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + + offset = offsetof(struct amdgpu_mes_ctx_meta_data, + gfx[0].gds_backup) + + offsetof(struct v9_gfx_meta_data, de_payload); + gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); + } else { + offset = offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; + + gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + + AMDGPU_CSA_SIZE - adev->gds.gds_size, + PAGE_SIZE); + } + de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); @@ -5367,9 +5530,15 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) WRITE_DATA_DST_SEL(8) | WR_CONFIRM) | WRITE_DATA_CACHE_POLICY(0)); - amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); - amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); - amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); + amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); + + if (resume) + amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, + sizeof(de_payload) >> 2); + else + amdgpu_ring_write_multiple(ring, (void *)&de_payload, + sizeof(de_payload) >> 2); } static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, @@ -5385,8 +5554,9 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) { uint32_t dw2 = 0; - if (amdgpu_sriov_vf(ring->adev)) - gfx_v9_0_ring_emit_ce_meta(ring); + gfx_v9_0_ring_emit_ce_meta(ring, + (!amdgpu_sriov_vf(ring->adev) && + flags & AMDGPU_IB_PREEMPTED) ? true : false); dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ if (flags & AMDGPU_HAVE_CTX_SWITCH) { @@ -5712,7 +5882,12 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, switch (me_id) { case 0: - amdgpu_fence_process(&adev->gfx.gfx_ring[0]); + if (adev->gfx.num_gfx_rings && + !amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) { + /* Fence signals are handled on the software rings*/ + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) + amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); + } break; case 1: case 2: @@ -6709,6 +6884,62 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, + .preempt_ib = gfx_v9_0_ring_preempt_ib, + .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, + .emit_wreg = gfx_v9_0_ring_emit_wreg, + .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v9_0_ring_soft_recovery, + .emit_mem_sync = gfx_v9_0_emit_mem_sync, +}; + +static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { + .type = AMDGPU_RING_TYPE_GFX, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .secure_submission_supported = true, + .vmhub = AMDGPU_GFXHUB_0, + .get_rptr = amdgpu_sw_ring_get_rptr_gfx, + .get_wptr = amdgpu_sw_ring_get_wptr_gfx, + .set_wptr = amdgpu_sw_ring_set_wptr_gfx, + .emit_frame_size = /* totally 242 maximum if 16 IBs */ + 5 + /* COND_EXEC */ + 7 + /* PIPELINE_SYNC */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* VM_FLUSH */ + 8 + /* FENCE for VM_FLUSH */ + 20 + /* GDS switch */ + 4 + /* double SWITCH_BUFFER, + * the first COND_EXEC jump to the place just + * prior to this double SWITCH_BUFFER + */ + 5 + /* COND_EXEC */ + 7 + /* HDP_flush */ + 4 + /* VGT_flush */ + 14 + /* CE_META */ + 31 + /* DE_META */ + 3 + /* CNTX_CTRL */ + 5 + /* HDP_INVL */ + 8 + 8 + /* FENCE x2 */ + 2 + /* SWITCH_BUFFER */ + 7, /* gfx_v9_0_emit_mem_sync */ + .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ + .emit_ib = gfx_v9_0_ring_emit_ib_gfx, + .emit_fence = gfx_v9_0_ring_emit_fence, + .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, + .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, + .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, + .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, + .test_ring = gfx_v9_0_ring_test_ring, + .test_ib = gfx_v9_0_ring_test_ib, + .insert_nop = amdgpu_sw_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_switch_buffer = gfx_v9_ring_emit_sb, + .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, + .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, + .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, @@ -6794,6 +7025,11 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; + if (adev->gfx.num_gfx_rings) { + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) + adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx; + } + for (i = 0; i < adev->gfx.num_compute_rings; i++) adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 96e52ec0fb69..4326078689cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -759,6 +759,7 @@ static int gmc_v11_0_sw_init(void *handle) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): adev->num_vmhubs = 2; /* * To fulfill 4-level page support, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 50386eb2eec8..08d6cf79fb15 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1185,6 +1185,8 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping, uint64_t *flags) { + struct amdgpu_bo *bo = mapping->bo_va->base.bo; + *flags &= ~AMDGPU_PTE_EXECUTABLE; *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; @@ -1196,7 +1198,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, *flags &= ~AMDGPU_PTE_VALID; } - if (mapping->bo_va->base.bo) + if (bo && bo->tbo.resource) gmc_v9_0_get_coherence_flags(adev, mapping->bo_va->base.bo, mapping, flags); } diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 8d9c1e841353..970b066b37bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -388,6 +388,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.disable_reset = 1; mes_set_hw_res_pkt.disable_mes_log = 1; mes_set_hw_res_pkt.use_different_vmid_compute = 1; + mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; return mes_v11_0_submit_pkt_and_poll_completion(mes, @@ -1341,7 +1342,8 @@ static int mes_v11_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!amdgpu_in_reset(adev) && + /* it's only intended for use in mes_self_test case, not for s0ix and reset */ + if (!amdgpu_in_reset(adev) && !adev->in_s0ix && (adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))) amdgpu_mes_self_test(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 998b5d17b271..0e664d0cc8d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -319,7 +319,7 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev) tmp = mmMMVM_L2_CNTL5_DEFAULT; tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); - WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL5, tmp); } static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index 1b027d069ab4..4638ea7c2eec 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -243,7 +243,7 @@ static void mmhub_v2_3_init_cache_regs(struct amdgpu_device *adev) tmp = mmMMVM_L2_CNTL5_DEFAULT; tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); - WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL5, tmp); } static void mmhub_v2_3_enable_system_domain(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c index a1d26c4d80b8..16cc82215e2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c @@ -275,7 +275,7 @@ static void mmhub_v3_0_init_cache_regs(struct amdgpu_device *adev) tmp = regMMVM_L2_CNTL5_DEFAULT; tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); - WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp); } static void mmhub_v3_0_enable_system_domain(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c index e8058edc1d10..6bdf2ef0298d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c @@ -269,7 +269,7 @@ static void mmhub_v3_0_1_init_cache_regs(struct amdgpu_device *adev) tmp = regMMVM_L2_CNTL5_DEFAULT; tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); - WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp); } static void mmhub_v3_0_1_enable_system_domain(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c index 770be0a8f7ce..45465acaa943 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c @@ -268,7 +268,7 @@ static void mmhub_v3_0_2_init_cache_regs(struct amdgpu_device *adev) tmp = regMMVM_L2_CNTL5_DEFAULT; tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); - WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp); } static void mmhub_v3_0_2_enable_system_domain(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h index f772bb499f3e..0312c71c3af9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h @@ -32,7 +32,6 @@ #define RB_ENABLED (1 << 0) #define RB4_ENABLED (1 << 1) -#define MMSCH_DOORBELL_OFFSET 0x8 #define MMSCH_VF_ENGINE_STATUS__PASS 0x1 diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index b3fba8dea63c..6853b93ac82e 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -82,10 +82,10 @@ static const struct amdgpu_video_codecs nv_video_codecs_encode = /* Navi1x */ static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, @@ -100,10 +100,10 @@ static const struct amdgpu_video_codecs nv_video_codecs_decode = /* Sienna Cichlid */ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, @@ -125,10 +125,10 @@ static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] = static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, @@ -149,7 +149,7 @@ static struct amdgpu_video_codecs sriov_sc_video_codecs_decode = /* Beige Goby*/ static const struct amdgpu_video_codec_info bg_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; @@ -166,7 +166,7 @@ static const struct amdgpu_video_codecs bg_video_codecs_encode = { /* Yellow Carp*/ static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index ee27bfaba6fd..e6a26a7e5e5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -46,6 +46,8 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_10_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 @@ -102,6 +104,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) case IP_VERSION(13, 0, 3): case IP_VERSION(13, 0, 5): case IP_VERSION(13, 0, 8): + case IP_VERSION(13, 0, 11): err = psp_init_toc_microcode(psp, chip_name); if (err) return err; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 1122bd4eae98..4d780e4430e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -907,13 +907,13 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se /** - * sdma_v4_0_gfx_stop - stop the gfx async dma engines + * sdma_v4_0_gfx_enable - enable the gfx async dma engines * * @adev: amdgpu_device pointer - * - * Stop the gfx async dma ring buffers (VEGA10). + * @enable: enable SDMA RB/IB + * control the gfx async dma ring buffers (VEGA10). */ -static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) +static void sdma_v4_0_gfx_enable(struct amdgpu_device *adev, bool enable) { u32 rb_cntl, ib_cntl; int i; @@ -922,10 +922,10 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, enable ? 1 : 0); WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL); - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, enable ? 1 : 0); WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); } } @@ -1044,7 +1044,7 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) int i; if (!enable) { - sdma_v4_0_gfx_stop(adev); + sdma_v4_0_gfx_enable(adev, enable); sdma_v4_0_rlc_stop(adev); if (adev->sdma.has_page_queue) sdma_v4_0_page_stop(adev); @@ -1960,8 +1960,10 @@ static int sdma_v4_0_suspend(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SMU saves SDMA state for us */ - if (adev->in_s0ix) + if (adev->in_s0ix) { + sdma_v4_0_gfx_enable(adev, false); return 0; + } return sdma_v4_0_hw_fini(adev); } @@ -1971,8 +1973,12 @@ static int sdma_v4_0_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SMU restores SDMA state for us */ - if (adev->in_s0ix) + if (adev->in_s0ix) { + sdma_v4_0_enable(adev, true); + sdma_v4_0_gfx_enable(adev, true); + amdgpu_ttm_set_buffer_funcs_status(adev, true); return 0; + } return sdma_v4_0_hw_init(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index e3b2b6b4f1a6..7cd17dda32ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -103,10 +103,10 @@ static const struct amdgpu_video_codecs vega_video_codecs_encode = /* Vega */ static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, }; @@ -120,10 +120,10 @@ static const struct amdgpu_video_codecs vega_video_codecs_decode = /* Raven */ static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)}, @@ -138,10 +138,10 @@ static const struct amdgpu_video_codecs rv_video_codecs_decode = /* Renoir, Arcturus */ static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 799925d22fc8..2357ff39323f 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -162,6 +162,7 @@ * 2 - Bypass */ #define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21) +#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30) #define PACKET3_COPY_DATA 0x40 #define PACKET3_PFP_SYNC_ME 0x42 #define PACKET3_COND_WRITE 0x45 @@ -184,6 +185,7 @@ #define EOP_TC_ACTION_EN (1 << 17) /* L2 */ #define EOP_TC_NC_ACTION_EN (1 << 19) #define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */ +#define EOP_EXEC (1 << 28) /* For Trailing Fence */ #define DATA_SEL(x) ((x) << 29) /* 0 - discard diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index b258e9aa0558..5562670b7b52 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -43,6 +43,7 @@ #include "soc15.h" #include "soc15_common.h" #include "soc21.h" +#include "mxgpu_nv.h" static const struct amd_ip_funcs soc21_common_ip_funcs; @@ -61,7 +62,7 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode = static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, @@ -325,6 +326,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev) case IP_VERSION(13, 0, 10): return AMD_RESET_METHOD_MODE1; case IP_VERSION(13, 0, 4): + case IP_VERSION(13, 0, 11): return AMD_RESET_METHOD_MODE2; default: if (amdgpu_dpm_is_baco_supported(adev)) @@ -644,24 +646,62 @@ static int soc21_common_early_init(void *handle) AMD_PG_SUPPORT_JPEG; adev->external_rev_id = adev->rev_id + 0x20; break; + case IP_VERSION(11, 0, 4): + adev->cg_flags = + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_REPEATER_FGCG | + AMD_CG_SUPPORT_GFX_PERF_CLK | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_JPEG; + adev->external_rev_id = adev->rev_id + 0x1; + break; + default: /* FIXME: not supported yet */ return -EINVAL; } - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { amdgpu_virt_init_setting(adev); + xgpu_nv_mailbox_set_irq_funcs(adev); + } return 0; } static int soc21_common_late_init(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + xgpu_nv_mailbox_get_irq(adev); + return 0; } static int soc21_common_sw_init(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + xgpu_nv_mailbox_add_irq_id(adev); + return 0; } @@ -699,6 +739,9 @@ static int soc21_common_hw_fini(void *handle) /* disable the doorbell aperture */ soc21_enable_doorbell_aperture(adev, false); + if (amdgpu_sriov_vf(adev)) + xgpu_nv_mailbox_put_irq(adev); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index ce8374ee824d..ec87b00f2e05 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -770,6 +770,33 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) } } +static void vcn_v2_6_enable_ras(struct amdgpu_device *adev, int inst_idx, + bool indirect) +{ + uint32_t tmp; + + if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(2, 6, 0)) + return; + + tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK | + VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK | + VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK | + VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, + SOC15_DPG_MODE_OFFSET(VCN, 0, mmVCN_RAS_CNTL), + tmp, 0, indirect); + + tmp = UVD_VCPU_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, + SOC15_DPG_MODE_OFFSET(VCN, 0, mmUVD_VCPU_INT_EN), + tmp, 0, indirect); + + tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, + SOC15_DPG_MODE_OFFSET(VCN, 0, mmUVD_SYS_INT_EN), + tmp, 0, indirect); +} + static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; @@ -849,6 +876,8 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect); + vcn_v2_6_enable_ras(adev, inst_idx, indirect); + /* unblock VCPU register access */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 403d054cf51b..1e2b22299975 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -103,7 +103,6 @@ static int vcn_v4_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; - int vcn_doorbell_index = 0; r = amdgpu_vcn_sw_init(adev); if (r) @@ -115,12 +114,6 @@ static int vcn_v4_0_sw_init(void *handle) if (r) return r; - if (amdgpu_sriov_vf(adev)) { - vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 - MMSCH_DOORBELL_OFFSET; - /* get DWORD offset */ - vcn_doorbell_index = vcn_doorbell_index << 1; - } - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; @@ -144,7 +137,7 @@ static int vcn_v4_0_sw_init(void *handle) ring = &adev->vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; if (amdgpu_sriov_vf(adev)) - ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1; else ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; @@ -869,6 +862,28 @@ static void vcn_v4_0_enable_clock_gating(struct amdgpu_device *adev, int inst) return; } +static void vcn_v4_0_enable_ras(struct amdgpu_device *adev, int inst_idx, + bool indirect) +{ + uint32_t tmp; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) + return; + + tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK | + VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK | + VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK | + VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, + SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL), + tmp, 0, indirect); + + tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, + SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN), + tmp, 0, indirect); +} + /** * vcn_v4_0_start_dpg_mode - VCN start with dpg mode * @@ -957,6 +972,8 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect); + vcn_v4_0_enable_ras(adev, inst_idx, indirect); + /* enable master interrupt */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, regUVD_MASTINT_EN), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index af01ba061e1b..3251f4783ba1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1521,6 +1521,7 @@ int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pca case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): num_of_cache_types = kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info); break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index c8dd948966c2..b8936340742b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -153,6 +153,7 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): kfd->device_info.event_interrupt_class = &event_interrupt_class_v11; break; default: @@ -394,6 +395,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) f2g = &gfx_v11_kfd2kgd; break; case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): gfx_target_version = 110003; f2g = &gfx_v11_kfd2kgd; break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index a26257171ab7..51b1683ac5c1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -689,13 +689,13 @@ void kfd_process_destroy_wq(void) } static void kfd_process_free_gpuvm(struct kgd_mem *mem, - struct kfd_process_device *pdd, void *kptr) + struct kfd_process_device *pdd, void **kptr) { struct kfd_dev *dev = pdd->dev; - if (kptr) { + if (kptr && *kptr) { amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem); - kptr = NULL; + *kptr = NULL; } amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->adev, mem, pdd->drm_priv); @@ -795,7 +795,7 @@ static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd) if (!qpd->ib_kaddr || !qpd->ib_base) return; - kfd_process_free_gpuvm(qpd->ib_mem, pdd, qpd->ib_kaddr); + kfd_process_free_gpuvm(qpd->ib_mem, pdd, &qpd->ib_kaddr); } struct kfd_process *kfd_create_process(struct file *filep) @@ -1277,7 +1277,7 @@ static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd) if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base) return; - kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, qpd->cwsr_kaddr); + kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, &qpd->cwsr_kaddr); } void kfd_process_set_trap_handler(struct qcm_process_device *qpd, @@ -1576,9 +1576,9 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, p = pdd->process; dev = pdd->dev; - ret = amdgpu_amdkfd_gpuvm_acquire_process_vm( - dev->adev, drm_file, p->pasid, - &p->kgd_process_info, &p->ef); + ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, drm_file, + &p->kgd_process_info, + &p->ef); if (ret) { pr_err("Failed to create process VM object\n"); return ret; @@ -1593,13 +1593,19 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, if (ret) goto err_init_cwsr; + ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, drm_file, p->pasid); + if (ret) + goto err_set_pasid; + pdd->drm_file = drm_file; return 0; +err_set_pasid: + kfd_process_device_destroy_cwsr_dgpu(pdd); err_init_cwsr: + kfd_process_device_destroy_ib_mem(pdd); err_reserve_ib_mem: - kfd_process_device_free_bos(pdd); pdd->drm_priv = NULL; return ret; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index ef9c6fdfb88d..bceb1a5b2518 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1805,21 +1805,75 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct pr_debug("Added [%d] GPU cache entries\n", num_of_entries); } +static int kfd_topology_add_device_locked(struct kfd_dev *gpu, uint32_t gpu_id, + struct kfd_topology_device **dev) +{ + int proximity_domain = ++topology_crat_proximity_domain; + struct list_head temp_topology_device_list; + void *crat_image = NULL; + size_t image_size = 0; + int res; + + res = kfd_create_crat_image_virtual(&crat_image, &image_size, + COMPUTE_UNIT_GPU, gpu, + proximity_domain); + if (res) { + pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n", + gpu_id); + topology_crat_proximity_domain--; + goto err; + } + + INIT_LIST_HEAD(&temp_topology_device_list); + + res = kfd_parse_crat_table(crat_image, + &temp_topology_device_list, + proximity_domain); + if (res) { + pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n", + gpu_id); + topology_crat_proximity_domain--; + goto err; + } + + kfd_topology_update_device_list(&temp_topology_device_list, + &topology_device_list); + + *dev = kfd_assign_gpu(gpu); + if (WARN_ON(!*dev)) { + res = -ENODEV; + goto err; + } + + /* Fill the cache affinity information here for the GPUs + * using VCRAT + */ + kfd_fill_cache_non_crat_info(*dev, gpu); + + /* Update the SYSFS tree, since we added another topology + * device + */ + res = kfd_topology_update_sysfs(); + if (!res) + sys_props.generation_count++; + else + pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n", + gpu_id, res); + +err: + kfd_destroy_crat_image(crat_image); + return res; +} + int kfd_topology_add_device(struct kfd_dev *gpu) { uint32_t gpu_id; struct kfd_topology_device *dev; struct kfd_cu_info cu_info; int res = 0; - struct list_head temp_topology_device_list; - void *crat_image = NULL; - size_t image_size = 0; - int proximity_domain; int i; const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type]; - INIT_LIST_HEAD(&temp_topology_device_list); - gpu_id = kfd_generate_gpu_id(gpu); pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); @@ -1831,54 +1885,11 @@ int kfd_topology_add_device(struct kfd_dev *gpu) */ down_write(&topology_lock); dev = kfd_assign_gpu(gpu); - if (!dev) { - proximity_domain = ++topology_crat_proximity_domain; - - res = kfd_create_crat_image_virtual(&crat_image, &image_size, - COMPUTE_UNIT_GPU, gpu, - proximity_domain); - if (res) { - pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n", - gpu_id); - topology_crat_proximity_domain--; - return res; - } - - res = kfd_parse_crat_table(crat_image, - &temp_topology_device_list, - proximity_domain); - if (res) { - pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n", - gpu_id); - topology_crat_proximity_domain--; - goto err; - } - - kfd_topology_update_device_list(&temp_topology_device_list, - &topology_device_list); - - dev = kfd_assign_gpu(gpu); - if (WARN_ON(!dev)) { - res = -ENODEV; - goto err; - } - - /* Fill the cache affinity information here for the GPUs - * using VCRAT - */ - kfd_fill_cache_non_crat_info(dev, gpu); - - /* Update the SYSFS tree, since we added another topology - * device - */ - res = kfd_topology_update_sysfs(); - if (!res) - sys_props.generation_count++; - else - pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n", - gpu_id, res); - } + if (!dev) + res = kfd_topology_add_device_locked(gpu, gpu_id, &dev); up_write(&topology_lock); + if (res) + return res; dev->gpu_id = gpu_id; gpu->id = gpu_id; @@ -2001,11 +2012,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu) kfd_debug_print_topology(); - if (!res) - kfd_notify_gpu_change(gpu_id, 1); -err: - kfd_destroy_crat_image(crat_image); - return res; + kfd_notify_gpu_change(gpu_id, 1); + + return 0; } /** diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 1cdb379a90d7..2efe93f74f84 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -5,6 +5,7 @@ menu "Display Engine Configuration" config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y + depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64 select SND_HDA_COMPONENT if SND_HDA_CORE # !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752 select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128 || (ARM64 && KERNEL_MODE_NEON && !CC_IS_CLANG)) @@ -13,6 +14,12 @@ config DRM_AMD_DC support for AMDGPU. This adds required support for Vega and Raven ASICs. + calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64) + architectures built with Clang (all released versions), whereby the stack + frame gets blown up to well over 5k. This would cause an immediate kernel + panic on most architectures. We'll revert this when the following bug report + has been resolved: https://github.com/llvm/llvm-project/issues/41896. + config DRM_AMD_DC_DCN def_bool n help diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a8772082883e..86bc23a67d97 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -146,14 +146,6 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU); /* Number of bytes in PSP footer for firmware. */ #define PSP_FOOTER_BYTES 0x100 -/* - * DMUB Async to Sync Mechanism Status - */ -#define DMUB_ASYNC_TO_SYNC_ACCESS_FAIL 1 -#define DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT 2 -#define DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS 3 -#define DMUB_ASYNC_TO_SYNC_ACCESS_INVALID 4 - /** * DOC: overview * @@ -1104,7 +1096,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) /* Initialize hardware. */ memset(&hw_params, 0, sizeof(hw_params)); hw_params.fb_base = adev->gmc.fb_start; - hw_params.fb_offset = adev->gmc.aper_base; + hw_params.fb_offset = adev->vm_manager.vram_base_offset; /* backdoor load firmware and trigger dmub running */ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) @@ -1226,7 +1218,7 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ pa_config->system_aperture.agp_top = (uint64_t)agp_top << 24; pa_config->system_aperture.fb_base = adev->gmc.fb_start; - pa_config->system_aperture.fb_offset = adev->gmc.aper_base; + pa_config->system_aperture.fb_offset = adev->vm_manager.vram_base_offset; pa_config->system_aperture.fb_top = adev->gmc.fb_end; pa_config->gart_config.page_table_start_addr = page_table_start.quad_part << 12; @@ -1371,7 +1363,44 @@ static const struct dmi_system_id hpd_disconnect_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"), }, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower Plus 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF Plus 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro Plus 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro 7010"), + }, + }, {} + /* TODO: refactor this from a fixed table to a dynamic option */ }; static void retrieve_dmi_info(struct amdgpu_display_manager *dm) @@ -1404,6 +1433,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) memset(&init_params, 0, sizeof(init_params)); #endif + mutex_init(&adev->dm.dpia_aux_lock); mutex_init(&adev->dm.dc_lock); mutex_init(&adev->dm.audio_lock); @@ -1473,6 +1503,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) case IP_VERSION(3, 0, 1): case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 3): + case IP_VERSION(3, 1, 4): case IP_VERSION(3, 1, 5): case IP_VERSION(3, 1, 6): init_data.flags.gpu_vm_support = true; @@ -1768,6 +1799,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) mutex_destroy(&adev->dm.audio_lock); mutex_destroy(&adev->dm.dc_lock); + mutex_destroy(&adev->dm.dpia_aux_lock); return; } @@ -4329,6 +4361,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) amdgpu_set_panel_orientation(&aconnector->base); } + /* If we didn't find a panel, notify the acpi video detection */ + if (dm->adev->flags & AMD_IS_APU && dm->num_of_edps == 0) + acpi_video_report_nolcd(); + /* Software is initialized. Now we can register interrupt handlers. */ switch (adev->asic_type) { #if defined(CONFIG_DRM_AMD_DC_SI) @@ -4838,6 +4874,35 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, return 0; } +static inline void fill_dc_dirty_rect(struct drm_plane *plane, + struct rect *dirty_rect, int32_t x, + int32_t y, int32_t width, int32_t height, + int *i, bool ffu) +{ + if (*i > DC_MAX_DIRTY_RECTS) + return; + + if (*i == DC_MAX_DIRTY_RECTS) + goto out; + + dirty_rect->x = x; + dirty_rect->y = y; + dirty_rect->width = width; + dirty_rect->height = height; + + if (ffu) + drm_dbg(plane->dev, + "[PLANE:%d] PSR FFU dirty rect size (%d, %d)\n", + plane->base.id, width, height); + else + drm_dbg(plane->dev, + "[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)", + plane->base.id, x, y, width, height); + +out: + (*i)++; +} + /** * fill_dc_dirty_rects() - Fill DC dirty regions for PSR selective updates * @@ -4858,10 +4923,6 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, * addition, certain use cases - such as cursor and multi-plane overlay (MPO) - * implicitly provide damage clips without any client support via the plane * bounds. - * - * Today, amdgpu_dm only supports the MPO and cursor usecase. - * - * TODO: Also enable for FB_DAMAGE_CLIPS */ static void fill_dc_dirty_rects(struct drm_plane *plane, struct drm_plane_state *old_plane_state, @@ -4872,12 +4933,11 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); struct rect *dirty_rects = flip_addrs->dirty_rects; uint32_t num_clips; + struct drm_mode_rect *clips; bool bb_changed; bool fb_changed; uint32_t i = 0; - flip_addrs->dirty_rect_count = 0; - /* * Cursor plane has it's own dirty rect update interface. See * dcn10_dmub_update_cursor_data and dmub_cmd_update_cursor_info_data @@ -4885,20 +4945,20 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, if (plane->type == DRM_PLANE_TYPE_CURSOR) return; - /* - * Today, we only consider MPO use-case for PSR SU. If MPO not - * requested, and there is a plane update, do FFU. - */ + num_clips = drm_plane_get_damage_clips_count(new_plane_state); + clips = drm_plane_get_damage_clips(new_plane_state); + if (!dm_crtc_state->mpo_requested) { - dirty_rects[0].x = 0; - dirty_rects[0].y = 0; - dirty_rects[0].width = dm_crtc_state->base.mode.crtc_hdisplay; - dirty_rects[0].height = dm_crtc_state->base.mode.crtc_vdisplay; - flip_addrs->dirty_rect_count = 1; - DRM_DEBUG_DRIVER("[PLANE:%d] PSR FFU dirty rect size (%d, %d)\n", - new_plane_state->plane->base.id, - dm_crtc_state->base.mode.crtc_hdisplay, - dm_crtc_state->base.mode.crtc_vdisplay); + if (!num_clips || num_clips > DC_MAX_DIRTY_RECTS) + goto ffu; + + for (; flip_addrs->dirty_rect_count < num_clips; clips++) + fill_dc_dirty_rect(new_plane_state->plane, + &dirty_rects[i], clips->x1, + clips->y1, clips->x2 - clips->x1, + clips->y2 - clips->y1, + &flip_addrs->dirty_rect_count, + false); return; } @@ -4909,7 +4969,6 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, * If plane is moved or resized, also add old bounding box to dirty * rects. */ - num_clips = drm_plane_get_damage_clips_count(new_plane_state); fb_changed = old_plane_state->fb->base.id != new_plane_state->fb->base.id; bb_changed = (old_plane_state->crtc_x != new_plane_state->crtc_x || @@ -4917,36 +4976,51 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, old_plane_state->crtc_w != new_plane_state->crtc_w || old_plane_state->crtc_h != new_plane_state->crtc_h); - DRM_DEBUG_DRIVER("[PLANE:%d] PSR bb_changed:%d fb_changed:%d num_clips:%d\n", - new_plane_state->plane->base.id, - bb_changed, fb_changed, num_clips); - - if (num_clips || fb_changed || bb_changed) { - dirty_rects[i].x = new_plane_state->crtc_x; - dirty_rects[i].y = new_plane_state->crtc_y; - dirty_rects[i].width = new_plane_state->crtc_w; - dirty_rects[i].height = new_plane_state->crtc_h; - DRM_DEBUG_DRIVER("[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)\n", - new_plane_state->plane->base.id, - dirty_rects[i].x, dirty_rects[i].y, - dirty_rects[i].width, dirty_rects[i].height); - i += 1; - } + drm_dbg(plane->dev, + "[PLANE:%d] PSR bb_changed:%d fb_changed:%d num_clips:%d\n", + new_plane_state->plane->base.id, + bb_changed, fb_changed, num_clips); - /* Add old plane bounding-box if plane is moved or resized */ if (bb_changed) { - dirty_rects[i].x = old_plane_state->crtc_x; - dirty_rects[i].y = old_plane_state->crtc_y; - dirty_rects[i].width = old_plane_state->crtc_w; - dirty_rects[i].height = old_plane_state->crtc_h; - DRM_DEBUG_DRIVER("[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)\n", - old_plane_state->plane->base.id, - dirty_rects[i].x, dirty_rects[i].y, - dirty_rects[i].width, dirty_rects[i].height); - i += 1; - } + fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i], + new_plane_state->crtc_x, + new_plane_state->crtc_y, + new_plane_state->crtc_w, + new_plane_state->crtc_h, &i, false); + + /* Add old plane bounding-box if plane is moved or resized */ + fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i], + old_plane_state->crtc_x, + old_plane_state->crtc_y, + old_plane_state->crtc_w, + old_plane_state->crtc_h, &i, false); + } + + if (num_clips) { + for (; i < num_clips; clips++) + fill_dc_dirty_rect(new_plane_state->plane, + &dirty_rects[i], clips->x1, + clips->y1, clips->x2 - clips->x1, + clips->y2 - clips->y1, &i, false); + } else if (fb_changed && !bb_changed) { + fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i], + new_plane_state->crtc_x, + new_plane_state->crtc_y, + new_plane_state->crtc_w, + new_plane_state->crtc_h, &i, false); + } + + if (i > DC_MAX_DIRTY_RECTS) + goto ffu; flip_addrs->dirty_rect_count = i; + return; + +ffu: + fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[0], 0, 0, + dm_crtc_state->base.mode.crtc_hdisplay, + dm_crtc_state->base.mode.crtc_vdisplay, + &flip_addrs->dirty_rect_count, true); } static void update_stream_scaling_settings(const struct drm_display_mode *mode, @@ -10167,91 +10241,92 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address, return value; } -static int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux, - struct dc_context *ctx, - uint8_t status_type, - uint32_t *operation_result) +int amdgpu_dm_process_dmub_aux_transfer_sync( + struct dc_context *ctx, + unsigned int link_index, + struct aux_payload *payload, + enum aux_return_code_type *operation_result) { struct amdgpu_device *adev = ctx->driver_context; - int return_status = -1; struct dmub_notification *p_notify = adev->dm.dmub_notify; + int ret = -1; - if (is_cmd_aux) { - if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) { - return_status = p_notify->aux_reply.length; - *operation_result = p_notify->result; - } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT) { - *operation_result = AUX_RET_ERROR_TIMEOUT; - } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_FAIL) { - *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE; - } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_INVALID) { - *operation_result = AUX_RET_ERROR_INVALID_REPLY; - } else { - *operation_result = AUX_RET_ERROR_UNKNOWN; + mutex_lock(&adev->dm.dpia_aux_lock); + if (!dc_process_dmub_aux_transfer_async(ctx->dc, link_index, payload)) { + *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE; + goto out; + } + + if (!wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) { + DRM_ERROR("wait_for_completion_timeout timeout!"); + *operation_result = AUX_RET_ERROR_TIMEOUT; + goto out; + } + + if (p_notify->result != AUX_RET_SUCCESS) { + /* + * Transient states before tunneling is enabled could + * lead to this error. We can ignore this for now. + */ + if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) { + DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n", + payload->address, payload->length, + p_notify->result); } - } else { - if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) { - return_status = 0; - *operation_result = p_notify->sc_status; - } else { - *operation_result = SET_CONFIG_UNKNOWN_ERROR; + *operation_result = AUX_RET_ERROR_INVALID_REPLY; + goto out; + } + + + payload->reply[0] = adev->dm.dmub_notify->aux_reply.command; + if (!payload->write && p_notify->aux_reply.length && + (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) { + + if (payload->length != p_notify->aux_reply.length) { + DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n", + p_notify->aux_reply.length, + payload->address, payload->length); + *operation_result = AUX_RET_ERROR_INVALID_REPLY; + goto out; } + + memcpy(payload->data, p_notify->aux_reply.data, + p_notify->aux_reply.length); } - return return_status; + /* success */ + ret = p_notify->aux_reply.length; + *operation_result = p_notify->result; +out: + mutex_unlock(&adev->dm.dpia_aux_lock); + return ret; } -int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context *ctx, - unsigned int link_index, void *cmd_payload, void *operation_result) +int amdgpu_dm_process_dmub_set_config_sync( + struct dc_context *ctx, + unsigned int link_index, + struct set_config_cmd_payload *payload, + enum set_config_status *operation_result) { struct amdgpu_device *adev = ctx->driver_context; - int ret = 0; + bool is_cmd_complete; + int ret; - if (is_cmd_aux) { - dc_process_dmub_aux_transfer_async(ctx->dc, - link_index, (struct aux_payload *)cmd_payload); - } else if (dc_process_dmub_set_config_async(ctx->dc, link_index, - (struct set_config_cmd_payload *)cmd_payload, - adev->dm.dmub_notify)) { - return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, - ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS, - (uint32_t *)operation_result); - } + mutex_lock(&adev->dm.dpia_aux_lock); + is_cmd_complete = dc_process_dmub_set_config_async(ctx->dc, + link_index, payload, adev->dm.dmub_notify); - ret = wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ); - if (ret == 0) { + if (is_cmd_complete || wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) { + ret = 0; + *operation_result = adev->dm.dmub_notify->sc_status; + } else { DRM_ERROR("wait_for_completion_timeout timeout!"); - return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, - ctx, DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT, - (uint32_t *)operation_result); - } - - if (is_cmd_aux) { - if (adev->dm.dmub_notify->result == AUX_RET_SUCCESS) { - struct aux_payload *payload = (struct aux_payload *)cmd_payload; - - payload->reply[0] = adev->dm.dmub_notify->aux_reply.command; - if (!payload->write && adev->dm.dmub_notify->aux_reply.length && - payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK) { - - if (payload->length != adev->dm.dmub_notify->aux_reply.length) { - DRM_WARN("invalid read from DPIA AUX %x(%d) got length %d!\n", - payload->address, payload->length, - adev->dm.dmub_notify->aux_reply.length); - return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, ctx, - DMUB_ASYNC_TO_SYNC_ACCESS_INVALID, - (uint32_t *)operation_result); - } - - memcpy(payload->data, adev->dm.dmub_notify->aux_reply.data, - adev->dm.dmub_notify->aux_reply.length); - } - } + ret = -1; + *operation_result = SET_CONFIG_UNKNOWN_ERROR; } - return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, - ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS, - (uint32_t *)operation_result); + mutex_unlock(&adev->dm.dpia_aux_lock); + return ret; } /* diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 83436ef3b26b..df3c25e32c65 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -59,7 +59,9 @@ #include "signal_types.h" #include "amdgpu_dm_crc.h" struct aux_payload; +struct set_config_cmd_payload; enum aux_return_code_type; +enum set_config_status; /* Forward declarations */ struct amdgpu_device; @@ -542,6 +544,13 @@ struct amdgpu_display_manager { * occurred on certain intel platform */ bool aux_hpd_discon_quirk; + + /** + * @dpia_aux_lock: + * + * Guards access to DPIA AUX + */ + struct mutex dpia_aux_lock; }; enum dsc_clock_force_state { @@ -785,9 +794,11 @@ void amdgpu_dm_update_connector_after_detect( extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs; -int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, - struct dc_context *ctx, unsigned int link_index, - void *payload, void *operation_result); +int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int link_index, + struct aux_payload *payload, enum aux_return_code_type *operation_result); + +int amdgpu_dm_process_dmub_set_config_sync(struct dc_context *ctx, unsigned int link_index, + struct set_config_cmd_payload *payload, enum set_config_status *operation_result); bool check_seamless_boot_capability(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 2c43cdd2e707..461037a3dd75 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -2639,6 +2639,25 @@ static int dp_mst_progress_status_show(struct seq_file *m, void *unused) return 0; } +/* + * Reports whether the connected display is a USB4 DPIA tunneled display + * Example usage: cat /sys/kernel/debug/dri/0/DP-8/is_dpia_link + */ +static int is_dpia_link_show(struct seq_file *m, void *data) +{ + struct drm_connector *connector = m->private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct dc_link *link = aconnector->dc_link; + + if (connector->status != connector_status_connected) + return -ENODEV; + + seq_printf(m, "%s\n", (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) ? "yes" : + (link->ep_type == DISPLAY_ENDPOINT_PHY) ? "no" : "unknown"); + + return 0; +} + DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support); DEFINE_SHOW_ATTRIBUTE(dmub_fw_state); DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer); @@ -2650,6 +2669,7 @@ DEFINE_SHOW_ATTRIBUTE(internal_display); DEFINE_SHOW_ATTRIBUTE(psr_capability); DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector); DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status); +DEFINE_SHOW_ATTRIBUTE(is_dpia_link); static const struct file_operations dp_dsc_clock_en_debugfs_fops = { .owner = THIS_MODULE, @@ -2794,7 +2814,8 @@ static const struct { {"max_bpc", &dp_max_bpc_debugfs_fops}, {"dsc_disable_passthrough", &dp_dsc_disable_passthrough_debugfs_fops}, {"is_mst_connector", &dp_is_mst_connector_fops}, - {"mst_progress_status", &dp_mst_progress_status_fops} + {"mst_progress_status", &dp_mst_progress_status_fops}, + {"is_dpia_link", &is_dpia_link_fops} }; #ifdef CONFIG_DRM_AMD_DC_HDCP diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index e47098fa5aac..6994c9a1ed85 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -817,9 +817,8 @@ int dm_helper_dmub_aux_transfer_sync( struct aux_payload *payload, enum aux_return_code_type *operation_result) { - return amdgpu_dm_process_dmub_aux_transfer_sync(true, ctx, - link->link_index, (void *)payload, - (void *)operation_result); + return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload, + operation_result); } int dm_helpers_dmub_set_config_sync(struct dc_context *ctx, @@ -827,9 +826,8 @@ int dm_helpers_dmub_set_config_sync(struct dc_context *ctx, struct set_config_cmd_payload *payload, enum set_config_status *operation_result) { - return amdgpu_dm_process_dmub_aux_transfer_sync(false, ctx, - link->link_index, (void *)payload, - (void *)operation_result); + return amdgpu_dm_process_dmub_set_config_sync(ctx, link->link_index, payload, + operation_result); } void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 24d859ad4712..1edf7385f8d8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1187,7 +1187,7 @@ static int pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, struct amdgpu_dm_connector *aconnector; struct drm_dp_mst_topology_mgr *mst_mgr; int link_vars_start_index = 0; - int ret; + int ret = 0; for (i = 0; i < dc_state->stream_count; i++) computed_streams[i] = false; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index e6854f7270a6..3c50b3ff7954 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1600,6 +1600,10 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, supported_rotations); + if (dm->adev->ip_versions[DCE_HWIP][0] > IP_VERSION(3, 0, 1) && + plane->type != DRM_PLANE_TYPE_CURSOR) + drm_plane_enable_fb_damage_clips(plane); + drm_plane_helper_add(plane, &dm_plane_helper_funcs); #ifdef CONFIG_DRM_AMD_DC_HDR diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h index 8ea8ee57b39f..61bb1d86182e 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT // This is a stripped-down version of the smu11_driver_if.h file for the relevant DAL interfaces. #define SMU11_DRIVER_IF_VERSION 0x40 diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c index 090b2c02aee1..0827c7df2855 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c @@ -333,8 +333,8 @@ void dcn31_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zst (support == DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY)) support = DCN_ZSTATE_SUPPORT_DISALLOW; - - if (support == DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY) + if (support == DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY || + support == DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY) param = 1; else param = 0; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c index 2db595672a46..f47cfe6b42bd 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c @@ -346,8 +346,6 @@ void dcn314_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zs if (!clk_mgr->smu_present) return; - // Arg[15:0] = 8/9/0 for Z8/Z9/disallow -> existing bits - // Arg[16] = Disallow Z9 -> new bit switch (support) { case DCN_ZSTATE_SUPPORT_ALLOW: @@ -366,6 +364,16 @@ void dcn314_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zs param = (1 << 10); break; + case DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY: + msg_id = VBIOSSMC_MSG_AllowZstatesEntry; + param = (1 << 10) | (1 << 8); + break; + + case DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY: + msg_id = VBIOSSMC_MSG_AllowZstatesEntry; + param = (1 << 8); + break; + default: //DCN_ZSTATE_SUPPORT_UNKNOWN msg_id = VBIOSSMC_MSG_AllowZstatesEntry; param = 0; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index 6f77d8e538ab..200fcec19186 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -233,41 +233,6 @@ void dcn32_init_clocks(struct clk_mgr *clk_mgr_base) DC_FP_END(); } -static void dcn32_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr, - struct dc_state *context, - int ref_dtbclk_khz) -{ - struct dccg *dccg = clk_mgr->dccg; - uint32_t tg_mask = 0; - int i; - - for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - struct dtbclk_dto_params dto_params = {0}; - - /* use mask to program DTO once per tg */ - if (pipe_ctx->stream_res.tg && - !(tg_mask & (1 << pipe_ctx->stream_res.tg->inst))) { - tg_mask |= (1 << pipe_ctx->stream_res.tg->inst); - - dto_params.otg_inst = pipe_ctx->stream_res.tg->inst; - dto_params.ref_dtbclk_khz = ref_dtbclk_khz; - - if (is_dp_128b_132b_signal(pipe_ctx)) { - dto_params.pixclk_khz = pipe_ctx->stream->phy_pix_clk; - - if (pipe_ctx->stream_res.audio != NULL) - dto_params.req_audio_dtbclk_khz = 24000; - } - if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) - dto_params.is_hdmi = true; - - dccg->funcs->set_dtbclk_dto(clk_mgr->dccg, &dto_params); - //dccg->funcs->set_audio_dtbclk_dto(clk_mgr->dccg, &dto_params); - } - } -} - /* Since DPPCLK request to PMFW needs to be exact (due to DPP DTO programming), * update DPPCLK to be the exact frequency that will be set after the DPPCLK * divider is updated. This will prevent rounding issues that could cause DPP @@ -438,7 +403,7 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base, } if (!new_clocks->dtbclk_en) { - new_clocks->ref_dtbclk_khz = 0; + new_clocks->ref_dtbclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz * 1000; } /* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */ @@ -447,8 +412,6 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base, /* DCCG requires KHz precision for DTBCLK */ clk_mgr_base->clks.ref_dtbclk_khz = dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DTBCLK, khz_to_mhz_ceil(new_clocks->ref_dtbclk_khz)); - - dcn32_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz); } if (dc->config.forced_clocks == false || (force_reset && safe_to_lower)) { diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h index d30fbbdd1792..d3d5a8caccf8 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT // This is a stripped-down version of the smu13_driver_if.h file for the relevant DAL interfaces. #define SMU13_DRIVER_IF_VERSION 0x18 diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 1c3de3a1671e..0cb8d1f934d1 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1192,7 +1192,7 @@ static void disable_vbios_mode_if_required( if (pix_clk_100hz != requested_pix_clk_100hz) { core_link_disable_stream(pipe); - pipe->stream->dpms_off = true; + pipe->stream->dpms_off = false; } } } @@ -1556,6 +1556,9 @@ bool dc_validate_boot_timing(const struct dc *dc, if (tg_inst >= dc->res_pool->timing_generator_count) return false; + if (tg_inst != link->link_enc->preferred_engine) + return false; + tg = dc->res_pool->timing_generators[tg_inst]; if (!tg->funcs->get_hw_timing) @@ -1985,7 +1988,7 @@ context_alloc_fail: DC_LOG_DC("%s Finished.\n", __func__); - return (res == DC_OK); + return res; } /* TODO: When the transition to the new commit sequence is done, remove this @@ -3061,7 +3064,7 @@ static bool update_planes_and_stream_state(struct dc *dc, * Ensures that we have enough pipes for newly added MPO planes */ if (dc->res_pool->funcs->remove_phantom_pipes) - dc->res_pool->funcs->remove_phantom_pipes(dc, context); + dc->res_pool->funcs->remove_phantom_pipes(dc, context, false); /*remove old surfaces from context */ if (!dc_rem_all_planes_for_stream(dc, stream, context)) { @@ -3098,6 +3101,19 @@ static bool update_planes_and_stream_state(struct dc *dc, if (update_type == UPDATE_TYPE_FULL) { if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) { + /* For phantom pipes we remove and create a new set of phantom pipes + * for each full update (because we don't know if we'll need phantom + * pipes until after the first round of validation). However, if validation + * fails we need to keep the existing phantom pipes (because we don't update + * the dc->current_state). + * + * The phantom stream/plane refcount is decremented for validation because + * we assume it'll be removed (the free comes when the dc_state is freed), + * but if validation fails we have to increment back the refcount so it's + * consistent. + */ + if (dc->res_pool->funcs->retain_phantom_pipes) + dc->res_pool->funcs->retain_phantom_pipes(dc, dc->current_state); BREAK_TO_DEBUGGER(); goto fail; } @@ -3740,6 +3756,8 @@ static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc, struct dc_stream_status *cur_stream_status = stream_get_status(dc->current_state, stream); bool force_minimal_pipe_splitting = false; + bool subvp_active = false; + uint32_t i; *is_plane_addition = false; @@ -3771,11 +3789,25 @@ static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc, } } + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + + if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) { + subvp_active = true; + break; + } + } + /* For SubVP when adding or removing planes we need to add a minimal transition * (even when disabling all planes). Whenever disabling a phantom pipe, we * must use the minimal transition path to disable the pipe correctly. + * + * We want to use the minimal transition whenever subvp is active, not only if + * a plane is being added / removed from a subvp stream (MPO plane can be added + * to a DRR pipe of SubVP + DRR config, in which case we still want to run through + * a min transition to disable subvp. */ - if (cur_stream_status && stream->mall_stream_config.type == SUBVP_MAIN) { + if (cur_stream_status && subvp_active) { /* determine if minimal transition is required due to SubVP*/ if (cur_stream_status->plane_count > surface_count) { force_minimal_pipe_splitting = true; @@ -3925,6 +3957,7 @@ bool dc_update_planes_and_stream(struct dc *dc, struct dc_state *context; enum surface_update_type update_type; int i; + struct mall_temp_config mall_temp_config; /* In cases where MPO and split or ODM are used transitions can * cause underflow. Apply stream configuration with minimal pipe @@ -3956,11 +3989,29 @@ bool dc_update_planes_and_stream(struct dc *dc, /* on plane removal, minimal state is the new one */ if (force_minimal_pipe_splitting && !is_plane_addition) { + /* Since all phantom pipes are removed in full validation, + * we have to save and restore the subvp/mall config when + * we do a minimal transition since the flags marking the + * pipe as subvp/phantom will be cleared (dc copy constructor + * creates a shallow copy). + */ + if (dc->res_pool->funcs->save_mall_state) + dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config); if (!commit_minimal_transition_state(dc, context)) { dc_release_state(context); return false; } - + if (dc->res_pool->funcs->restore_mall_state) + dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config); + + /* If we do a minimal transition with plane removal and the context + * has subvp we also have to retain back the phantom stream / planes + * since the refcount is decremented as part of the min transition + * (we commit a state with no subvp, so the phantom streams / planes + * had to be removed). + */ + if (dc->res_pool->funcs->retain_phantom_pipes) + dc->res_pool->funcs->retain_phantom_pipes(dc, context); update_type = UPDATE_TYPE_FULL; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 5304e9daf90a..342e906ae26e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -3378,7 +3378,7 @@ bool dc_link_setup_psr(struct dc_link *link, case FAMILY_YELLOW_CARP: case AMDGPU_FAMILY_GC_10_3_6: case AMDGPU_FAMILY_GC_11_0_1: - if (dc->debug.disable_z10) + if (dc->debug.disable_z10 || dc->debug.psr_skip_crtc_disable) psr_context->psr_level.bits.SKIP_CRTC_DISABLE = true; break; default: diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 38d71b5c1f2d..20e534f73513 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -332,9 +332,21 @@ bool dc_stream_set_cursor_attributes( dc = stream->ctx->dc; - if (dc->debug.allow_sw_cursor_fallback && attributes->height * attributes->width * 4 > 16384) - if (stream->mall_stream_config.type == SUBVP_MAIN) + /* SubVP is not compatible with HW cursor larger than 64 x 64 x 4. + * Therefore, if cursor is greater than 64 x 64 x 4, fallback to SW cursor in the following case: + * 1. For single display cases, if resolution is >= 5K and refresh rate < 120hz + * 2. For multi display cases, if resolution is >= 4K and refresh rate < 120hz + * + * [< 120hz is a requirement for SubVP configs] + */ + if (dc->debug.allow_sw_cursor_fallback && attributes->height * attributes->width * 4 > 16384) { + if (dc->current_state->stream_count == 1 && stream->timing.v_addressable >= 2880 && + ((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120) return false; + else if (dc->current_state->stream_count > 1 && stream->timing.v_addressable >= 2160 && + ((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120) + return false; + } stream->cursor_attributes = *attributes; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 57fc9193c770..85ebeaa2de18 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.212" +#define DC_VER "3.2.215" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -267,6 +267,7 @@ struct dc_caps { uint16_t subvp_pstate_allow_width_us; uint16_t subvp_vertical_int_margin_us; bool seamless_odm; + uint8_t subvp_drr_vblank_start_margin_us; }; struct dc_bug_wa { @@ -492,6 +493,8 @@ enum dcn_pwr_state { enum dcn_zstate_support_state { DCN_ZSTATE_SUPPORT_UNKNOWN, DCN_ZSTATE_SUPPORT_ALLOW, + DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY, + DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY, DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY, DCN_ZSTATE_SUPPORT_DISALLOW, }; @@ -841,6 +844,7 @@ struct dc_debug_options { int crb_alloc_policy_min_disp_count; bool disable_z10; bool enable_z9_disable_interface; + bool psr_skip_crtc_disable; union dpia_debug_options dpia_debug; bool disable_fixed_vs_aux_timeout_wa; bool force_disable_subvp; @@ -849,6 +853,7 @@ struct dc_debug_options { unsigned int force_subvp_num_ways; unsigned int force_mall_ss_num_ways; bool alloc_extra_way_for_cursor; + uint32_t subvp_extra_lines; bool force_usr_allow; /* uses value at boot and disables switch */ bool disable_dtb_ref_clk_switch; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 097556f7b32c..6ccf477d1c4d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -493,6 +493,7 @@ static void populate_subvp_cmd_drr_info(struct dc *dc, pipe_data->pipe_config.vblank_data.drr_info.min_vtotal_supported = min_vtotal_supported; pipe_data->pipe_config.vblank_data.drr_info.max_vtotal_supported = max_vtotal_supported; + pipe_data->pipe_config.vblank_data.drr_info.drr_vblank_start_margin = dc->caps.subvp_drr_vblank_start_margin_us; } /** diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index dc6afe33bca2..2e18bcf6b11a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -151,6 +151,20 @@ struct dc_panel_config { bool optimize_edp_link_rate; /* eDP ILR */ } ilr; }; + +/* + * USB4 DPIA BW ALLOCATION STRUCTS + */ +struct dc_dpia_bw_alloc { + int sink_verified_bw; // The Verified BW that sink can allocated and use that has been verified already + int sink_allocated_bw; // The Actual Allocated BW that sink currently allocated + int padding_bw; // The Padding "Un-used" BW allocated by CM for padding reasons + int sink_max_bw; // The Max BW that sink can require/support + int estimated_bw; // The estimated available BW for this DPIA + int bw_granularity; // BW Granularity + bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3: DP-Tx & Dpia & CM +}; + /* * A link contains one or more sinks and their connected status. * The currently active signal type (HDMI, DP-SST, DP-MST) is also reported. diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index f4dfd3a49b68..dfd3df1d2f7e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -144,7 +144,7 @@ struct test_pattern { unsigned int cust_pattern_size; }; -#define SUBVP_DRR_MARGIN_US 500 // 500us for DRR margin (SubVP + DRR) +#define SUBVP_DRR_MARGIN_US 600 // 600us for DRR margin (SubVP + DRR) enum mall_stream_type { SUBVP_NONE, // subvp not in use @@ -160,6 +160,17 @@ struct mall_stream_config { struct dc_stream_state *paired_stream; // master / slave stream }; +/* Temp struct used to save and restore MALL config + * during validation. + * + * TODO: Move MALL config into dc_state instead of stream struct + * to avoid needing to save/restore. + */ +struct mall_temp_config { + struct mall_stream_config mall_stream_config[MAX_PIPES]; + bool is_phantom_plane[MAX_PIPES]; +}; + struct dc_stream_state { // sink is deprecated, new code should not reference // this pointer diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index c5380ce70653..913a1fe6b3da 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1561,8 +1561,13 @@ static enum dc_status apply_single_controller_ctx_to_hw( pipe_ctx->plane_res.scl_data.lb_params.alpha_en = pipe_ctx->bottom_pipe != NULL; - pipe_ctx->stream->link->psr_settings.psr_feature_enabled = false; - + /* Phantom and main stream share the same link (because the stream + * is constructed with the same sink). Make sure not to override + * and link programming on the main. + */ + if (pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) { + pipe_ctx->stream->link->psr_settings.psr_feature_enabled = false; + } return DC_OK; } diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 1b70b78e2fa1..af631085e88c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -359,7 +359,8 @@ static const struct dce_audio_registers audio_regs[] = { audio_regs(2), audio_regs(3), audio_regs(4), - audio_regs(5) + audio_regs(5), + audio_regs(6), }; #define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index b9765b3899e1..ef52e6b6eccf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -436,34 +436,48 @@ void dpp1_set_cursor_position( uint32_t height) { struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); - int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x; - int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y; + int x_pos = pos->x - param->viewport.x; + int y_pos = pos->y - param->viewport.y; + int x_hotspot = pos->x_hotspot; + int y_hotspot = pos->y_hotspot; + int src_x_offset = x_pos - pos->x_hotspot; + int src_y_offset = y_pos - pos->y_hotspot; + int cursor_height = (int)height; + int cursor_width = (int)width; uint32_t cur_en = pos->enable ? 1 : 0; - // Cursor width/height and hotspots need to be rotated for offset calculation + // Transform cursor width / height and hotspots for offset calculations if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) { - swap(width, height); + swap(cursor_height, cursor_width); + swap(x_hotspot, y_hotspot); + if (param->rotation == ROTATION_ANGLE_90) { - src_x_offset = pos->x - pos->y_hotspot - param->viewport.x; - src_y_offset = pos->y - pos->x_hotspot - param->viewport.y; + // hotspot = (-y, x) + src_x_offset = x_pos - (cursor_width - x_hotspot); + src_y_offset = y_pos - y_hotspot; + } else if (param->rotation == ROTATION_ANGLE_270) { + // hotspot = (y, -x) + src_x_offset = x_pos - x_hotspot; + src_y_offset = y_pos - (cursor_height - y_hotspot); } } else if (param->rotation == ROTATION_ANGLE_180) { + // hotspot = (-x, -y) if (!param->mirror) - src_x_offset = pos->x - param->viewport.x; + src_x_offset = x_pos - (cursor_width - x_hotspot); - src_y_offset = pos->y - param->viewport.y; + src_y_offset = y_pos - (cursor_height - y_hotspot); } if (src_x_offset >= (int)param->viewport.width) cur_en = 0; /* not visible beyond right edge*/ - if (src_x_offset + (int)width <= 0) + if (src_x_offset + cursor_width <= 0) cur_en = 0; /* not visible beyond left edge*/ if (src_y_offset >= (int)param->viewport.height) cur_en = 0; /* not visible beyond bottom edge*/ - if (src_y_offset + (int)height <= 0) + if (src_y_offset + cursor_height <= 0) cur_en = 0; /* not visible beyond top edge*/ REG_UPDATE(CURSOR0_CONTROL, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c index 0f746bb4e500..d51f1ce02874 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c @@ -55,7 +55,7 @@ void hubbub1_wm_read_state(struct hubbub *hubbub, s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A); s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A); } - s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A); + s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A); s = &wm->sets[1]; s->wm_set = 1; @@ -65,7 +65,7 @@ void hubbub1_wm_read_state(struct hubbub *hubbub, s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B); s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B); } - s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B); + s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B); s = &wm->sets[2]; s->wm_set = 2; @@ -75,7 +75,7 @@ void hubbub1_wm_read_state(struct hubbub *hubbub, s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C); s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C); } - s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C); + s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C); s = &wm->sets[3]; s->wm_set = 3; @@ -85,7 +85,7 @@ void hubbub1_wm_read_state(struct hubbub *hubbub, s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D); s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D); } - s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D); + s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D); } void hubbub1_allow_self_refresh_control(struct hubbub *hubbub, bool allow) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 52e201e9b091..a142a00bc432 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -1179,10 +1179,12 @@ void hubp1_cursor_set_position( const struct dc_cursor_mi_param *param) { struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); - int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x; - int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y; + int x_pos = pos->x - param->viewport.x; + int y_pos = pos->y - param->viewport.y; int x_hotspot = pos->x_hotspot; int y_hotspot = pos->y_hotspot; + int src_x_offset = x_pos - pos->x_hotspot; + int src_y_offset = y_pos - pos->y_hotspot; int cursor_height = (int)hubp->curs_attr.height; int cursor_width = (int)hubp->curs_attr.width; uint32_t dst_x_offset; @@ -1200,18 +1202,26 @@ void hubp1_cursor_set_position( if (hubp->curs_attr.address.quad_part == 0) return; - // Rotated cursor width/height and hotspots tweaks for offset calculation + // Transform cursor width / height and hotspots for offset calculations if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) { swap(cursor_height, cursor_width); + swap(x_hotspot, y_hotspot); + if (param->rotation == ROTATION_ANGLE_90) { - src_x_offset = pos->x - pos->y_hotspot - param->viewport.x; - src_y_offset = pos->y - pos->x_hotspot - param->viewport.y; + // hotspot = (-y, x) + src_x_offset = x_pos - (cursor_width - x_hotspot); + src_y_offset = y_pos - y_hotspot; + } else if (param->rotation == ROTATION_ANGLE_270) { + // hotspot = (y, -x) + src_x_offset = x_pos - x_hotspot; + src_y_offset = y_pos - (cursor_height - y_hotspot); } } else if (param->rotation == ROTATION_ANGLE_180) { + // hotspot = (-x, -y) if (!param->mirror) - src_x_offset = pos->x - param->viewport.x; + src_x_offset = x_pos - (cursor_width - x_hotspot); - src_y_offset = pos->y - param->viewport.y; + src_y_offset = y_pos - (cursor_height - y_hotspot); } dst_x_offset = (src_x_offset >= 0) ? src_x_offset : 0; @@ -1248,8 +1258,8 @@ void hubp1_cursor_set_position( CURSOR_Y_POSITION, pos->y); REG_SET_2(CURSOR_HOT_SPOT, 0, - CURSOR_HOT_SPOT_X, x_hotspot, - CURSOR_HOT_SPOT_Y, y_hotspot); + CURSOR_HOT_SPOT_X, pos->x_hotspot, + CURSOR_HOT_SPOT_Y, pos->y_hotspot); REG_SET(CURSOR_DST_OFFSET, 0, CURSOR_DST_X_OFFSET, dst_x_offset); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 587b04b8712d..fe2023f18b7d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -159,7 +159,7 @@ static void dcn10_log_hubbub_state(struct dc *dc, DTN_INFO_MICRO_SEC(s->pte_meta_urgent); DTN_INFO_MICRO_SEC(s->sr_enter); DTN_INFO_MICRO_SEC(s->sr_exit); - DTN_INFO_MICRO_SEC(s->dram_clk_chanage); + DTN_INFO_MICRO_SEC(s->dram_clk_change); DTN_INFO("\n"); } @@ -869,6 +869,32 @@ static void false_optc_underflow_wa( tg->funcs->clear_optc_underflow(tg); } +static int calculate_vready_offset_for_group(struct pipe_ctx *pipe) +{ + struct pipe_ctx *other_pipe; + int vready_offset = pipe->pipe_dlg_param.vready_offset; + + /* Always use the largest vready_offset of all connected pipes */ + for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) { + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) + vready_offset = other_pipe->pipe_dlg_param.vready_offset; + } + for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) { + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) + vready_offset = other_pipe->pipe_dlg_param.vready_offset; + } + for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) { + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) + vready_offset = other_pipe->pipe_dlg_param.vready_offset; + } + for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) { + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) + vready_offset = other_pipe->pipe_dlg_param.vready_offset; + } + + return vready_offset; +} + enum dc_status dcn10_enable_stream_timing( struct pipe_ctx *pipe_ctx, struct dc_state *context, @@ -912,7 +938,7 @@ enum dc_status dcn10_enable_stream_timing( pipe_ctx->stream_res.tg->funcs->program_timing( pipe_ctx->stream_res.tg, &stream->timing, - pipe_ctx->pipe_dlg_param.vready_offset, + calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width, @@ -2190,6 +2216,12 @@ void dcn10_enable_vblanks_synchronization( opp = grouped_pipes[i]->stream_res.opp; tg = grouped_pipes[i]->stream_res.tg; tg->funcs->get_otg_active_size(tg, &width, &height); + + if (!tg->funcs->is_tg_enabled(tg)) { + DC_SYNC_INFO("Skipping timing sync on disabled OTG\n"); + return; + } + if (opp->funcs->opp_program_dpg_dimensions) opp->funcs->opp_program_dpg_dimensions(opp, width, 2*(height) + 1); } @@ -2252,6 +2284,12 @@ void dcn10_enable_timing_synchronization( opp = grouped_pipes[i]->stream_res.opp; tg = grouped_pipes[i]->stream_res.tg; tg->funcs->get_otg_active_size(tg, &width, &height); + + if (!tg->funcs->is_tg_enabled(tg)) { + DC_SYNC_INFO("Skipping timing sync on disabled OTG\n"); + return; + } + if (opp->funcs->opp_program_dpg_dimensions) opp->funcs->opp_program_dpg_dimensions(opp, width, 2*(height) + 1); } @@ -2902,7 +2940,7 @@ void dcn10_program_pipe( pipe_ctx->stream_res.tg->funcs->program_global_sync( pipe_ctx->stream_res.tg, - pipe_ctx->pipe_dlg_param.vready_offset, + calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c index e8b6065fffad..a0f8e31d2adc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c @@ -83,7 +83,7 @@ static unsigned int dcn10_get_hubbub_state(struct dc *dc, char *pBuf, unsigned i memset(&wm, 0, sizeof(struct dcn_hubbub_wm)); dc->res_pool->hubbub->funcs->wm_read_state(dc->res_pool->hubbub, &wm); - chars_printed = snprintf_count(pBuf, remaining_buffer, "wm_set_index,data_urgent,pte_meta_urgent,sr_enter,sr_exit,dram_clk_chanage\n"); + chars_printed = snprintf_count(pBuf, remaining_buffer, "wm_set_index,data_urgent,pte_meta_urgent,sr_enter,sr_exit,dram_clk_change\n"); remaining_buffer -= chars_printed; pBuf += chars_printed; @@ -98,7 +98,7 @@ static unsigned int dcn10_get_hubbub_state(struct dc *dc, char *pBuf, unsigned i (s->pte_meta_urgent * frac) / ref_clk_mhz / frac, (s->pte_meta_urgent * frac) / ref_clk_mhz % frac, (s->sr_enter * frac) / ref_clk_mhz / frac, (s->sr_enter * frac) / ref_clk_mhz % frac, (s->sr_exit * frac) / ref_clk_mhz / frac, (s->sr_exit * frac) / ref_clk_mhz % frac, - (s->dram_clk_chanage * frac) / ref_clk_mhz / frac, (s->dram_clk_chanage * frac) / ref_clk_mhz % frac); + (s->dram_clk_change * frac) / ref_clk_mhz / frac, (s->dram_clk_change * frac) / ref_clk_mhz % frac); remaining_buffer -= chars_printed; pBuf += chars_printed; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c index aacb1fb5c73e..24bd93219936 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c @@ -500,7 +500,7 @@ void hubbub2_wm_read_state(struct hubbub *hubbub, s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A); s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A); } - s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A); + s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A); s = &wm->sets[1]; s->wm_set = 1; @@ -511,7 +511,7 @@ void hubbub2_wm_read_state(struct hubbub *hubbub, s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B); s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B); } - s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B); + s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B); s = &wm->sets[2]; s->wm_set = 2; @@ -522,7 +522,7 @@ void hubbub2_wm_read_state(struct hubbub *hubbub, s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C); s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C); } - s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C); + s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C); s = &wm->sets[3]; s->wm_set = 3; @@ -533,7 +533,7 @@ void hubbub2_wm_read_state(struct hubbub *hubbub, s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D); s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D); } - s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D); + s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D); } void hubbub2_get_dchub_ref_freq(struct hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index 938dba5249d4..4566bc7abf17 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -973,10 +973,12 @@ void hubp2_cursor_set_position( const struct dc_cursor_mi_param *param) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); - int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x; - int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y; + int x_pos = pos->x - param->viewport.x; + int y_pos = pos->y - param->viewport.y; int x_hotspot = pos->x_hotspot; int y_hotspot = pos->y_hotspot; + int src_x_offset = x_pos - pos->x_hotspot; + int src_y_offset = y_pos - pos->y_hotspot; int cursor_height = (int)hubp->curs_attr.height; int cursor_width = (int)hubp->curs_attr.width; uint32_t dst_x_offset; @@ -994,18 +996,26 @@ void hubp2_cursor_set_position( if (hubp->curs_attr.address.quad_part == 0) return; - // Rotated cursor width/height and hotspots tweaks for offset calculation + // Transform cursor width / height and hotspots for offset calculations if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) { swap(cursor_height, cursor_width); + swap(x_hotspot, y_hotspot); + if (param->rotation == ROTATION_ANGLE_90) { - src_x_offset = pos->x - pos->y_hotspot - param->viewport.x; - src_y_offset = pos->y - pos->x_hotspot - param->viewport.y; + // hotspot = (-y, x) + src_x_offset = x_pos - (cursor_width - x_hotspot); + src_y_offset = y_pos - y_hotspot; + } else if (param->rotation == ROTATION_ANGLE_270) { + // hotspot = (y, -x) + src_x_offset = x_pos - x_hotspot; + src_y_offset = y_pos - (cursor_height - y_hotspot); } } else if (param->rotation == ROTATION_ANGLE_180) { + // hotspot = (-x, -y) if (!param->mirror) - src_x_offset = pos->x - param->viewport.x; + src_x_offset = x_pos - (cursor_width - x_hotspot); - src_y_offset = pos->y - param->viewport.y; + src_y_offset = y_pos - (cursor_height - y_hotspot); } dst_x_offset = (src_x_offset >= 0) ? src_x_offset : 0; @@ -1042,8 +1052,8 @@ void hubp2_cursor_set_position( CURSOR_Y_POSITION, pos->y); REG_SET_2(CURSOR_HOT_SPOT, 0, - CURSOR_HOT_SPOT_X, x_hotspot, - CURSOR_HOT_SPOT_Y, y_hotspot); + CURSOR_HOT_SPOT_X, pos->x_hotspot, + CURSOR_HOT_SPOT_Y, pos->y_hotspot); REG_SET(CURSOR_DST_OFFSET, 0, CURSOR_DST_X_OFFSET, dst_x_offset); @@ -1052,8 +1062,8 @@ void hubp2_cursor_set_position( hubp->pos.cur_ctl.bits.cur_enable = cur_en; hubp->pos.position.bits.x_pos = pos->x; hubp->pos.position.bits.y_pos = pos->y; - hubp->pos.hot_spot.bits.x_hot = x_hotspot; - hubp->pos.hot_spot.bits.y_hot = y_hotspot; + hubp->pos.hot_spot.bits.x_hot = pos->x_hotspot; + hubp->pos.hot_spot.bits.y_hot = pos->y_hotspot; hubp->pos.dst_offset.bits.dst_x_offset = dst_x_offset; /* Cursor Rectangle Cache * Cursor bitmaps have different hotspot values diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 3f3d4daa6294..6291a241158a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1652,6 +1652,31 @@ static void dcn20_update_dchubp_dpp( hubp->funcs->phantom_hubp_post_enable(hubp); } +static int calculate_vready_offset_for_group(struct pipe_ctx *pipe) +{ + struct pipe_ctx *other_pipe; + int vready_offset = pipe->pipe_dlg_param.vready_offset; + + /* Always use the largest vready_offset of all connected pipes */ + for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) { + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) + vready_offset = other_pipe->pipe_dlg_param.vready_offset; + } + for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) { + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) + vready_offset = other_pipe->pipe_dlg_param.vready_offset; + } + for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) { + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) + vready_offset = other_pipe->pipe_dlg_param.vready_offset; + } + for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) { + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) + vready_offset = other_pipe->pipe_dlg_param.vready_offset; + } + + return vready_offset; +} static void dcn20_program_pipe( struct dc *dc, @@ -1670,7 +1695,7 @@ static void dcn20_program_pipe( && !pipe_ctx->prev_odm_pipe) { pipe_ctx->stream_res.tg->funcs->program_global_sync( pipe_ctx->stream_res.tg, - pipe_ctx->pipe_dlg_param.vready_offset, + calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width); @@ -1716,7 +1741,10 @@ static void dcn20_program_pipe( * only do gamma programming for powering on, internal memcmp to avoid * updating on slave planes */ - if (pipe_ctx->update_flags.bits.enable || pipe_ctx->stream->update_flags.bits.out_tf) + if (pipe_ctx->update_flags.bits.enable || + pipe_ctx->update_flags.bits.plane_changed || + pipe_ctx->stream->update_flags.bits.out_tf || + pipe_ctx->plane_state->update_flags.bits.output_tf_change) hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); /* If the pipe has been enabled or has a different opp, we @@ -1983,10 +2011,13 @@ void dcn20_prepare_bandwidth( /* decrease compbuf size */ if (hubbub->funcs->program_compbuf_size) { - if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) + if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) { compbuf_size_kb = context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes; - else + dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes); + } else { compbuf_size_kb = context->bw_ctx.bw.dcn.compbuf_size_kb; + dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb); + } hubbub->funcs->program_compbuf_size(hubbub, compbuf_size_kb, false); } @@ -2067,7 +2098,7 @@ bool dcn20_update_bandwidth( pipe_ctx->stream_res.tg->funcs->program_global_sync( pipe_ctx->stream_res.tg, - pipe_ctx->pipe_dlg_param.vready_offset, + calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width); diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c index c5e200d09038..aeb0e0d9b70a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c @@ -635,7 +635,7 @@ void hubbub21_wm_read_state(struct hubbub *hubbub, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, &s->sr_exit); REG_GET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, - DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, &s->dram_clk_chanage); + DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, &s->dram_clk_change); s = &wm->sets[1]; s->wm_set = 1; @@ -649,7 +649,7 @@ void hubbub21_wm_read_state(struct hubbub *hubbub, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, &s->sr_exit); REG_GET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, - DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, &s->dram_clk_chanage); + DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, &s->dram_clk_change); s = &wm->sets[2]; s->wm_set = 2; @@ -663,7 +663,7 @@ void hubbub21_wm_read_state(struct hubbub *hubbub, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, &s->sr_exit); REG_GET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, - DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, &s->dram_clk_chanage); + DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, &s->dram_clk_change); s = &wm->sets[3]; s->wm_set = 3; @@ -677,7 +677,7 @@ void hubbub21_wm_read_state(struct hubbub *hubbub, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, &s->sr_exit); REG_GET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, - DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, &s->dram_clk_chanage); + DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, &s->dram_clk_change); } static void hubbub21_apply_DEDCN21_147_wa(struct hubbub *hubbub) diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index 480145f09246..8cf10351f271 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -1493,6 +1493,8 @@ static bool dcn301_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.dp_hdmi21_pcon_support = true; + /* read VBIOS LTTPR caps */ if (ctx->dc_bios->funcs->get_lttpr_caps) { enum bp_result bp_query_result; diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index 7d11c2a43cbe..47cffd0e6830 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -1281,6 +1281,8 @@ static bool dcn302_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.dp_hdmi21_pcon_support = true; + /* read VBIOS LTTPR caps */ if (ctx->dc_bios->funcs->get_lttpr_caps) { enum bp_result bp_query_result; diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 92393b04cc44..c14d35894b2e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -1212,6 +1212,8 @@ static bool dcn303_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.dp_hdmi21_pcon_support = true; + /* read VBIOS LTTPR caps */ if (ctx->dc_bios->funcs->get_lttpr_caps) { enum bp_result bp_query_result; diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 165c920ca776..4226a051df41 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -623,3 +623,43 @@ void dcn31_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable) if (hws->ctx->dc->debug.hpo_optimization) REG_UPDATE(HPO_TOP_HW_CONTROL, HPO_IO_EN, !!enable); } +void dcn31_set_drr(struct pipe_ctx **pipe_ctx, + int num_pipes, struct dc_crtc_timing_adjust adjust) +{ + int i = 0; + struct drr_params params = {0}; + unsigned int event_triggers = 0x2;/*Bit[1]: OTG_TRIG_A*/ + unsigned int num_frames = 2; + params.vertical_total_max = adjust.v_total_max; + params.vertical_total_min = adjust.v_total_min; + params.vertical_total_mid = adjust.v_total_mid; + params.vertical_total_mid_frame_num = adjust.v_total_mid_frame_num; + for (i = 0; i < num_pipes; i++) { + if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) { + if (pipe_ctx[i]->stream_res.tg->funcs->set_drr) + pipe_ctx[i]->stream_res.tg->funcs->set_drr( + pipe_ctx[i]->stream_res.tg, ¶ms); + if (adjust.v_total_max != 0 && adjust.v_total_min != 0) + if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control) + pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( + pipe_ctx[i]->stream_res.tg, + event_triggers, num_frames); + } + } +} +void dcn31_set_static_screen_control(struct pipe_ctx **pipe_ctx, + int num_pipes, const struct dc_static_screen_params *params) +{ + unsigned int i; + unsigned int triggers = 0; + if (params->triggers.surface_update) + triggers |= 0x600;/*bit 9 and bit10 : 110 0000 0000*/ + if (params->triggers.cursor_update) + triggers |= 0x10;/*bit4*/ + if (params->triggers.force_trigger) + triggers |= 0x1; + for (i = 0; i < num_pipes; i++) + pipe_ctx[i]->stream_res.tg->funcs-> + set_static_screen_control(pipe_ctx[i]->stream_res.tg, + triggers, params->num_frames); +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h index edfc01d6ad73..e7e03a8722e0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h @@ -56,4 +56,8 @@ bool dcn31_is_abm_supported(struct dc *dc, void dcn31_init_pipes(struct dc *dc, struct dc_state *context); void dcn31_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable); +void dcn31_set_static_screen_control(struct pipe_ctx **pipe_ctx, + int num_pipes, const struct dc_static_screen_params *params); +void dcn31_set_drr(struct pipe_ctx **pipe_ctx, + int num_pipes, struct dc_crtc_timing_adjust adjust); #endif /* __DC_HWSS_DCN31_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index 3a32810bbe38..7c2da70ffe21 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -64,9 +64,9 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .prepare_bandwidth = dcn20_prepare_bandwidth, .optimize_bandwidth = dcn20_optimize_bandwidth, .update_bandwidth = dcn20_update_bandwidth, - .set_drr = dcn10_set_drr, + .set_drr = dcn31_set_drr, .get_position = dcn10_get_position, - .set_static_screen_control = dcn10_set_static_screen_control, + .set_static_screen_control = dcn31_set_static_screen_control, .setup_stereo = dcn10_setup_stereo, .set_avmute = dcn30_set_avmute, .log_hw_state = dcn10_log_hw_state, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c index 63a677c8ee27..fe449f7aa771 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c @@ -40,6 +40,7 @@ #define FN(reg_name, field_name) \ optc1->tg_shift->field_name, optc1->tg_mask->field_name +#define STATIC_SCREEN_EVENT_MASK_DRR_DOUBLE_BUFFER_UPDATE_EN 0x2000 /*bit 13*/ static void optc31_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt, struct dc_crtc_timing *timing) { @@ -231,6 +232,32 @@ void optc3_init_odm(struct timing_generator *optc) OPTC_MEM_SEL, 0); optc1->opp_count = 1; } +void optc31_set_static_screen_control( + struct timing_generator *optc, + uint32_t event_triggers, + uint32_t num_frames) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t framecount; + uint32_t events; + + if (num_frames > 0xFF) + num_frames = 0xFF; + REG_GET_2(OTG_STATIC_SCREEN_CONTROL, + OTG_STATIC_SCREEN_EVENT_MASK, &events, + OTG_STATIC_SCREEN_FRAME_COUNT, &framecount); + + if (events == event_triggers && num_frames == framecount) + return; + if ((event_triggers & STATIC_SCREEN_EVENT_MASK_DRR_DOUBLE_BUFFER_UPDATE_EN) + != 0) + event_triggers = event_triggers & + ~STATIC_SCREEN_EVENT_MASK_DRR_DOUBLE_BUFFER_UPDATE_EN; + + REG_UPDATE_2(OTG_STATIC_SCREEN_CONTROL, + OTG_STATIC_SCREEN_EVENT_MASK, event_triggers, + OTG_STATIC_SCREEN_FRAME_COUNT, num_frames); +} static struct timing_generator_funcs dcn31_tg_funcs = { .validate_timing = optc1_validate_timing, @@ -266,7 +293,7 @@ static struct timing_generator_funcs dcn31_tg_funcs = { .set_drr = optc31_set_drr, .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, .set_vtotal_min_max = optc1_set_vtotal_min_max, - .set_static_screen_control = optc1_set_static_screen_control, + .set_static_screen_control = optc31_set_static_screen_control, .program_stereo = optc1_program_stereo, .is_stereo_left_eye = optc1_is_stereo_left_eye, .tg_init = optc3_tg_init, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h index 30b81a448ce2..5fc6c63580d7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h @@ -263,5 +263,8 @@ bool optc31_immediate_disable_crtc(struct timing_generator *optc); void optc31_set_drr(struct timing_generator *optc, const struct drr_params *params); void optc3_init_odm(struct timing_generator *optc); - +void optc31_set_static_screen_control( + struct timing_generator *optc, + uint32_t event_triggers, + uint32_t num_frames); #endif /* __DC_OPTC_DCN31_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c index 367cb6e6d074..0b769ee71405 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c @@ -96,6 +96,13 @@ static void dccg314_set_pixel_rate_div( struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); enum pixel_rate_div cur_k1 = PIXEL_RATE_DIV_NA, cur_k2 = PIXEL_RATE_DIV_NA; + // Don't program 0xF into the register field. Not valid since + // K1 / K2 field is only 1 / 2 bits wide + if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) { + BREAK_TO_DEBUGGER(); + return; + } + dccg314_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2); if (k1 == cur_k1 && k2 == cur_k2) return; diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c index 588c1c71241f..a0741794db62 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c @@ -348,10 +348,8 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); odm_combine_factor = get_odm_config(pipe_ctx, NULL); - if (pipe_ctx->stream->signal == SIGNAL_TYPE_VIRTUAL) - return odm_combine_factor; - if (is_dp_128b_132b_signal(pipe_ctx)) { + *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_1; } else if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) || dc_is_dvi_signal(pipe_ctx->stream->signal)) { *k1_div = PIXEL_RATE_DIV_BY_1; @@ -359,7 +357,7 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig *k2_div = PIXEL_RATE_DIV_BY_2; else *k2_div = PIXEL_RATE_DIV_BY_4; - } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) { + } else if (dc_is_dp_signal(pipe_ctx->stream->signal) || dc_is_virtual_signal(pipe_ctx->stream->signal)) { if (two_pix_per_container) { *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_2; diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c index 5b6c2d94ec71..31feb4b0edee 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c @@ -66,9 +66,9 @@ static const struct hw_sequencer_funcs dcn314_funcs = { .prepare_bandwidth = dcn20_prepare_bandwidth, .optimize_bandwidth = dcn20_optimize_bandwidth, .update_bandwidth = dcn20_update_bandwidth, - .set_drr = dcn10_set_drr, + .set_drr = dcn31_set_drr, .get_position = dcn10_get_position, - .set_static_screen_control = dcn10_set_static_screen_control, + .set_static_screen_control = dcn31_set_static_screen_control, .setup_stereo = dcn10_setup_stereo, .set_avmute = dcn30_set_avmute, .log_hw_state = dcn10_log_hw_state, diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c index f246aab23050..41edbd64ea21 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c @@ -228,7 +228,7 @@ static struct timing_generator_funcs dcn314_tg_funcs = { .set_drr = optc31_set_drr, .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, .set_vtotal_min_max = optc1_set_vtotal_min_max, - .set_static_screen_control = optc1_set_static_screen_control, + .set_static_screen_control = optc31_set_static_screen_control, .program_stereo = optc1_program_stereo, .is_stereo_left_eye = optc1_is_stereo_left_eye, .tg_init = optc3_tg_init, diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index 4fffc7bb8088..f9ea1e86707f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -886,6 +886,7 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_z10 = false, .enable_z9_disable_interface = true, + .psr_skip_crtc_disable = true, .disable_dmcu = true, .force_abm_enable = false, .timing_trace = false, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c index e4daed44ef5f..e4472c6be6c3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c @@ -96,8 +96,10 @@ static void dccg32_set_pixel_rate_div( // Don't program 0xF into the register field. Not valid since // K1 / K2 field is only 1 / 2 bits wide - if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) + if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) { + BREAK_TO_DEBUGGER(); return; + } dccg32_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2); if (k1 == cur_k1 && k2 == cur_k2) @@ -223,11 +225,7 @@ static void dccg32_set_dtbclk_dto( } else { REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst], DTBCLK_DTO_ENABLE[params->otg_inst], 0, - PIPE_DTO_SRC_SEL[params->otg_inst], 1); - if (params->is_hdmi) - REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], - PIPE_DTO_SRC_SEL[params->otg_inst], 0); - + PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1); REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0); REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c index 5947c2cb0f30..9501403a48a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c @@ -865,7 +865,7 @@ static void hubbub32_wm_read_state(struct hubbub *hubbub, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, &s->sr_exit); REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, &s->dram_clk_chanage); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, &s->dram_clk_change); REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, &s->usr_retrain); @@ -885,7 +885,7 @@ static void hubbub32_wm_read_state(struct hubbub *hubbub, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, &s->sr_exit); REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, &s->dram_clk_chanage); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, &s->dram_clk_change); REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, &s->usr_retrain); @@ -905,7 +905,7 @@ static void hubbub32_wm_read_state(struct hubbub *hubbub, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, &s->sr_exit); REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_C, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_C, &s->dram_clk_chanage); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_C, &s->dram_clk_change); REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_C, DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_C, &s->usr_retrain); @@ -925,7 +925,7 @@ static void hubbub32_wm_read_state(struct hubbub *hubbub, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, &s->sr_exit); REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_D, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_D, &s->dram_clk_chanage); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_D, &s->dram_clk_change); REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_D, DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_D, &s->usr_retrain); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index 763311ffb967..b8767be1e4c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -227,8 +227,13 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + /* If PSR is supported on an eDP panel that's connected, but that panel is + * not in PSR at the time of trying to enter MALL SS, we have to include it + * in the static screen CAB calculation + */ if (!pipe->stream || !pipe->plane_state || - pipe->stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED || + (pipe->stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED && + pipe->stream->link->psr_settings.psr_allow_active) || pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) continue; @@ -257,11 +262,11 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) * ((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height); - /* For DCC: - * meta_num_mblk = CEILING(full_mblk_width_ub_l*full_mblk_height_ub_l*Bpe/256/mblk_bytes, 1) + /*For DCC: + * meta_num_mblk = CEILING(meta_pitch*full_vp_height*Bpe/256/mblk_bytes, 1) */ if (pipe->plane_state->dcc.enable) - num_mblks += (mall_alloc_width_blk_aligned * mall_alloc_width_blk_aligned * bytes_per_pixel + + num_mblks += (pipe->plane_state->dcc.meta_pitch * pipe->plane_res.scl_data.viewport.height * bytes_per_pixel + (256 * DCN3_2_MALL_MBLK_SIZE_BYTES) - 1) / (256 * DCN3_2_MALL_MBLK_SIZE_BYTES); bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES; @@ -311,8 +316,8 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c cache_lines_used += (((cursor_size + DCN3_2_MALL_MBLK_SIZE_BYTES - 1) / DCN3_2_MALL_MBLK_SIZE_BYTES) * DCN3_2_MALL_MBLK_SIZE_BYTES) / dc->caps.cache_line_size + 2; + break; } - break; } } @@ -698,11 +703,7 @@ void dcn32_subvp_update_force_pstate(struct dc *dc, struct dc_state *context) for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - // For SubVP + DRR, also force disallow on the DRR pipe - // (We will force allow in the DMUB sequence -- some DRR timings by default won't allow P-State so we have - // to force once the vblank is stretched). - if (pipe->stream && pipe->plane_state && (pipe->stream->mall_stream_config.type == SUBVP_MAIN || - (pipe->stream->mall_stream_config.type == SUBVP_NONE && pipe->stream->ignore_msa_timing_param))) { + if (pipe->stream && pipe->plane_state && (pipe->stream->mall_stream_config.type == SUBVP_MAIN)) { struct hubp *hubp = pipe->plane_res.hubp; if (hubp && hubp->funcs->hubp_update_force_pstate_disallow) @@ -780,6 +781,10 @@ void dcn32_program_mall_pipe_config(struct dc *dc, struct dc_state *context) if (hws && hws->funcs.update_mall_sel) hws->funcs.update_mall_sel(dc, context); + //update subvp force pstate + if (hws && hws->funcs.subvp_update_force_pstate) + dc->hwseq->funcs.subvp_update_force_pstate(dc, context); + // Program FORCE_ONE_ROW_FOR_FRAME and CURSOR_REQ_MODE for main subvp pipes for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; @@ -1170,10 +1175,8 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); odm_combine_factor = get_odm_config(pipe_ctx, NULL); - if (pipe_ctx->stream->signal == SIGNAL_TYPE_VIRTUAL) - return odm_combine_factor; - if (is_dp_128b_132b_signal(pipe_ctx)) { + *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_1; } else if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) || dc_is_dvi_signal(pipe_ctx->stream->signal)) { *k1_div = PIXEL_RATE_DIV_BY_1; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index cdeff6de725d..e4dbc8353ea3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -723,7 +723,7 @@ static const struct dc_debug_options debug_defaults_drv = { /* Must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions*/ .enable_double_buffered_dsc_pg_support = true, .enable_dp_dig_pixel_rate_div_policy = 1, - .allow_sw_cursor_fallback = false, + .allow_sw_cursor_fallback = false, // Linux can't do SW cursor "fallback" .alloc_extra_way_for_cursor = true, .min_prefetch_in_strobe_ns = 60000, // 60us }; @@ -1721,8 +1721,29 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc, return phantom_stream; } +void dcn32_retain_phantom_pipes(struct dc *dc, struct dc_state *context) +{ + int i; + struct dc_plane_state *phantom_plane = NULL; + struct dc_stream_state *phantom_stream = NULL; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->top_pipe && !pipe->prev_odm_pipe && + pipe->plane_state && pipe->stream && + pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + phantom_plane = pipe->plane_state; + phantom_stream = pipe->stream; + + dc_plane_state_retain(phantom_plane); + dc_stream_retain(phantom_stream); + } + } +} + // return true if removed piped from ctx, false otherwise -bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context) +bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context, bool fast_update) { int i; bool removed_pipe = false; @@ -1749,14 +1770,23 @@ bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context) removed_pipe = true; } - // Clear all phantom stream info - if (pipe->stream) { - pipe->stream->mall_stream_config.type = SUBVP_NONE; - pipe->stream->mall_stream_config.paired_stream = NULL; - } + /* For non-full updates, a shallow copy of the current state + * is created. In this case we don't want to erase the current + * state (there can be 2 HIRQL threads, one in flip, and one in + * checkMPO) that can cause a race condition. + * + * This is just a workaround, needs a proper fix. + */ + if (!fast_update) { + // Clear all phantom stream info + if (pipe->stream) { + pipe->stream->mall_stream_config.type = SUBVP_NONE; + pipe->stream->mall_stream_config.paired_stream = NULL; + } - if (pipe->plane_state) { - pipe->plane_state->is_phantom = false; + if (pipe->plane_state) { + pipe->plane_state->is_phantom = false; + } } } return removed_pipe; @@ -1903,7 +1933,7 @@ int dcn32_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; if (context->stream_count == 1 && - context->stream_status[0].plane_count <= 1 && + context->stream_status[0].plane_count == 1 && !dc_is_hdmi_signal(res_ctx->pipe_ctx[i].stream->signal) && is_h_timing_divisible_by_2(res_ctx->pipe_ctx[i].stream) && pipe->stream->timing.pix_clk_100hz * 100 > DCN3_2_VMIN_DISPCLK_HZ && @@ -1929,23 +1959,28 @@ int dcn32_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_19; - switch (pipe->stream->mall_stream_config.type) { - case SUBVP_MAIN: - pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport; - subvp_in_use = true; - break; - case SUBVP_PHANTOM: - pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe; - pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable; - // Disallow unbounded req for SubVP according to DCHUB programming guide - pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; - break; - case SUBVP_NONE: - pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_disable; - pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable; - break; - default: - break; + /* Only populate DML input with subvp info for full updates. + * This is just a workaround -- needs a proper fix. + */ + if (!fast_validate) { + switch (pipe->stream->mall_stream_config.type) { + case SUBVP_MAIN: + pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport; + subvp_in_use = true; + break; + case SUBVP_PHANTOM: + pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe; + pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable; + // Disallow unbounded req for SubVP according to DCHUB programming guide + pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; + break; + case SUBVP_NONE: + pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_disable; + pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable; + break; + default: + break; + } } pipes[pipe_cnt].dout.dsc_input_bpc = 0; @@ -2033,6 +2068,9 @@ static struct resource_funcs dcn32_res_pool_funcs = { .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, .add_phantom_pipes = dcn32_add_phantom_pipes, .remove_phantom_pipes = dcn32_remove_phantom_pipes, + .retain_phantom_pipes = dcn32_retain_phantom_pipes, + .save_mall_state = dcn32_save_mall_state, + .restore_mall_state = dcn32_restore_mall_state, }; @@ -2124,6 +2162,7 @@ static bool dcn32_resource_construct( dc->caps.subvp_swath_height_margin_lines = 16; dc->caps.subvp_pstate_allow_width_us = 20; dc->caps.subvp_vertical_int_margin_us = 30; + dc->caps.subvp_drr_vblank_start_margin_us = 100; // 100us margin dc->caps.max_slave_planes = 2; dc->caps.max_slave_yuv_planes = 2; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index f6bc9bd5da31..13fbc574910b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -45,17 +45,6 @@ extern struct _vcs_dpi_ip_params_st dcn3_2_ip; extern struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc; -/* Temp struct used to save and restore MALL config - * during validation. - * - * TODO: Move MALL config into dc_state instead of stream struct - * to avoid needing to save/restore. - */ -struct mall_temp_config { - struct mall_stream_config mall_stream_config[MAX_PIPES]; - bool is_phantom_plane[MAX_PIPES]; -}; - struct dcn32_resource_pool { struct resource_pool base; }; @@ -81,6 +70,9 @@ bool dcn32_release_post_bldn_3dlut( struct dc_transfer_func **shaper); bool dcn32_remove_phantom_pipes(struct dc *dc, + struct dc_state *context, bool fast_update); + +void dcn32_retain_phantom_pipes(struct dc *dc, struct dc_state *context); void dcn32_add_phantom_pipes(struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index b03a7814e96d..783935c4e664 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -97,21 +97,21 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat * FLOOR(vp_x_start, blk_width) */ full_vp_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x + - pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) + + pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) - (pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width); /* full_vp_height_blk_aligned = FLOOR(vp_y_start + full_vp_height + blk_height - 1, blk_height) - * FLOOR(vp_y_start, blk_height) */ full_vp_height_blk_aligned = ((pipe->plane_res.scl_data.viewport.y + - full_vp_height + mblk_height - 1) / mblk_height * mblk_height) + + full_vp_height + mblk_height - 1) / mblk_height * mblk_height) - (pipe->plane_res.scl_data.viewport.y / mblk_height * mblk_height); /* mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c */ mall_alloc_width_blk_aligned = full_vp_width_blk_aligned; /* mall_alloc_height_blk_aligned_l/c = CEILING(sub_vp_height_l/c - 1, blk_height_l/c) + blk_height_l/c */ - mall_alloc_height_blk_aligned = (pipe->stream->timing.v_addressable - 1 + mblk_height - 1) / + mall_alloc_height_blk_aligned = (pipe->plane_res.scl_data.viewport.height - 1 + mblk_height - 1) / mblk_height * mblk_height + mblk_height; /* full_mblk_width_ub_l/c = mall_alloc_width_blk_aligned_l/c; @@ -121,14 +121,19 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat */ num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) * ((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height); + + /*For DCC: + * meta_num_mblk = CEILING(meta_pitch*full_vp_height*Bpe/256/mblk_bytes, 1) + */ + if (pipe->plane_state->dcc.enable) + num_mblks += (pipe->plane_state->dcc.meta_pitch * pipe->plane_res.scl_data.viewport.height * bytes_per_pixel + + (256 * DCN3_2_MALL_MBLK_SIZE_BYTES) - 1) / (256 * DCN3_2_MALL_MBLK_SIZE_BYTES); + bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES; // cache lines used is total bytes / cache_line size. Add +2 for worst case alignment // (MALL is 64-byte aligned) cache_lines_per_plane = bytes_in_mall / dc->caps.cache_line_size + 2; - /* For DCC divide by 256 */ - if (pipe->plane_state->dcc.enable) - cache_lines_per_plane = cache_lines_per_plane + (cache_lines_per_plane / 256) + 1; cache_lines_used += cache_lines_per_plane; } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index 6c79a47b6336..d1f36df03c2e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -721,7 +721,7 @@ static const struct dc_debug_options debug_defaults_drv = { /*must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions*/ .enable_double_buffered_dsc_pg_support = true, .enable_dp_dig_pixel_rate_div_policy = 1, - .allow_sw_cursor_fallback = false, + .allow_sw_cursor_fallback = false, // Linux can't do SW cursor "fallback" .alloc_extra_way_for_cursor = true, .min_prefetch_in_strobe_ns = 60000, // 60us }; @@ -743,7 +743,7 @@ static const struct dc_debug_options debug_defaults_diags = { .dmub_command_table = true, .enable_tri_buf = true, .use_max_lb = true, - .force_disable_subvp = true + .force_disable_subvp = true, }; @@ -1621,6 +1621,9 @@ static struct resource_funcs dcn321_res_pool_funcs = { .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, .add_phantom_pipes = dcn32_add_phantom_pipes, .remove_phantom_pipes = dcn32_remove_phantom_pipes, + .retain_phantom_pipes = dcn32_retain_phantom_pipes, + .save_mall_state = dcn32_save_mall_state, + .restore_mall_state = dcn32_restore_mall_state, }; @@ -1711,6 +1714,7 @@ static bool dcn321_resource_construct( dc->caps.subvp_swath_height_margin_lines = 16; dc->caps.subvp_pstate_allow_width_us = 20; dc->caps.subvp_vertical_int_margin_us = 30; + dc->caps.subvp_drr_vblank_start_margin_us = 100; // 100us margin dc->caps.max_slave_planes = 1; dc->caps.max_slave_yuv_planes = 1; dc->caps.max_slave_rgb_planes = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index 75dbb7ee193b..c26da3bb2892 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -565,7 +565,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .dppclk_mhz = 847.06, .phyclk_mhz = 810.0, .socclk_mhz = 953.0, - .dscclk_mhz = 489.0, + .dscclk_mhz = 300.0, .dram_speed_mts = 2400.0, }, { @@ -576,7 +576,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .dppclk_mhz = 960.00, .phyclk_mhz = 810.0, .socclk_mhz = 278.0, - .dscclk_mhz = 287.67, + .dscclk_mhz = 342.86, .dram_speed_mts = 2666.0, }, { @@ -587,7 +587,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .dppclk_mhz = 1028.57, .phyclk_mhz = 810.0, .socclk_mhz = 715.0, - .dscclk_mhz = 318.334, + .dscclk_mhz = 369.23, .dram_speed_mts = 3200.0, }, { @@ -949,6 +949,7 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc int plane_count; int i; unsigned int optimized_min_dst_y_next_start_us; + bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > 1000.0; plane_count = 0; optimized_min_dst_y_next_start_us = 0; @@ -963,6 +964,8 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc * 2. single eDP, on link 0, 1 plane and stutter period > 5ms * Z10 only cases: * 1. single eDP, on link 0, 1 plane and stutter period >= 5ms + * Z8 cases: + * 1. stutter period sufficient * Zstate not allowed cases: * 1. Everything else */ @@ -990,11 +993,14 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc if (context->bw_ctx.dml.vba.StutterPeriod > 5000.0 || optimized_min_dst_y_next_start_us > 5000) return DCN_ZSTATE_SUPPORT_ALLOW; else if (link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr) - return DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY; + return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY; else - return DCN_ZSTATE_SUPPORT_DISALLOW; - } else + return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY : DCN_ZSTATE_SUPPORT_DISALLOW; + } else if (allow_z8) { + return DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY; + } else { return DCN_ZSTATE_SUPPORT_DISALLOW; + } } void dcn20_calculate_dlg_params( diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index 479e2c1a1301..379729b02847 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -4851,7 +4851,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->SwathHeightYThisState[k], v->SwathHeightCThisState[k], v->HTotal[k] / v->PixelClock[k], - v->UrgentLatency, + v->UrgLatency[i], v->CursorBufferSize, v->CursorWidth[k][0], v->CursorBPP[k][0], diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c index 12b23bd50e19..b37d14369a62 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -559,6 +559,9 @@ void dcn31_calculate_wm_and_dlg_fp( context->bw_ctx.bw.dcn.clk.dramclk_khz = 0; context->bw_ctx.bw.dcn.clk.fclk_khz = 0; context->bw_ctx.bw.dcn.clk.p_state_change_support = true; + for (i = 0; i < dc->res_pool->pipe_count; i++) + if (context->res_ctx.pipe_ctx[i].stream) + context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0; } } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index 4e45c6d9ecdc..ec351c8418cb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -5082,7 +5082,7 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->SwathHeightYThisState[k], v->SwathHeightCThisState[k], v->HTotal[k] / v->PixelClock[k], - v->UrgentLatency, + v->UrgLatency[i], v->CursorBufferSize, v->CursorWidth[k][0], v->CursorBPP[k][0], diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c index 1dd51c4b6804..6a1cf6adea77 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c @@ -149,8 +149,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { .num_states = 5, .sr_exit_time_us = 16.5, .sr_enter_plus_exit_time_us = 18.5, - .sr_exit_z8_time_us = 442.0, - .sr_enter_plus_exit_z8_time_us = 560.0, + .sr_exit_z8_time_us = 280.0, + .sr_enter_plus_exit_z8_time_us = 350.0, .writeback_latency_us = 12.0, .dram_channel_width_bytes = 4, .round_trip_ping_latency_dcfclk_cycles = 106, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c index 41f0b4c1c72f..950669f2c10d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c @@ -5179,7 +5179,7 @@ void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_ v->SwathHeightYThisState[k], v->SwathHeightCThisState[k], v->HTotal[k] / v->PixelClock[k], - v->UrgentLatency, + v->UrgLatency[i], v->CursorBufferSize, v->CursorWidth[k][0], v->CursorBPP[k][0], diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 97b333b230d1..f94abd124021 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -157,7 +157,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { .dispclk_dppclk_vco_speed_mhz = 4300.0, .do_urgent_latency_adjustment = true, .urgent_latency_adjustment_fabric_clock_component_us = 1.0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, }; void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) @@ -211,7 +211,7 @@ void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */ if (clk_mgr->base.ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) { clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 38; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 50; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; @@ -221,7 +221,7 @@ void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz * 16; - clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38; + clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 50; clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[1].memclk_mhz * 16; clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz * 16; @@ -256,16 +256,24 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, int vlevel) { const int max_latency_table_entries = 4; - const struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; int dummy_latency_index = 0; + enum clock_change_support temp_clock_change_support = vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; dc_assert_fp_enabled(); while (dummy_latency_index < max_latency_table_entries) { + if (temp_clock_change_support != dm_dram_clock_change_unsupported) + vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support; context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); + /* for subvp + DRR case, if subvp pipes are still present we support pstate */ + if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported && + dcn32_subvp_in_use(dc, context)) + vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support; + if (vlevel < context->bw_ctx.dml.vba.soc.num_states && vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported) break; @@ -577,6 +585,9 @@ void dcn32_set_phantom_stream_timing(struct dc *dc, num_dpp = vba->NoOfDPP[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]]; phantom_vactive += num_dpp > 1 ? vba->meta_row_height[vba->pipe_plane[pipe_idx]] : 0; + /* dc->debug.subvp_extra_lines 0 by default*/ + phantom_vactive += dc->debug.subvp_extra_lines; + // For backporch of phantom pipe, use vstartup of the main pipe phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); @@ -1141,7 +1152,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final == dm_prefetch_support_uclk_fclk_and_stutter) { context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = - dm_prefetch_support_stutter; + dm_prefetch_support_fclk_and_stutter; /* There are params (such as FabricClock) that need to be recalculated * after validation fails (otherwise it will be 0). Calculation for * phantom vactive requires call into DML, so we must ensure all the @@ -1192,7 +1203,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, // If SubVP pipe config is unsupported (or cannot be used for UCLK switching) // remove phantom pipes and repopulate dml pipes if (!found_supported_config) { - dc->res_pool->funcs->remove_phantom_pipes(dc, context); + dc->res_pool->funcs->remove_phantom_pipes(dc, context, false); vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported; *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); @@ -1309,7 +1320,10 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; - context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; + if (context->res_ctx.pipe_ctx[i].plane_state) + context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; + else + context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0; context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest; pipe_idx++; } @@ -1504,7 +1518,7 @@ bool dcn32_internal_validate_bw(struct dc *dc, return false; // For each full update, remove all existing phantom pipes first - dc->res_pool->funcs->remove_phantom_pipes(dc, context); + dc->res_pool->funcs->remove_phantom_pipes(dc, context, fast_validate); dc->res_pool->funcs->update_soc_for_wm_a(dc, context); @@ -1756,6 +1770,7 @@ bool dcn32_internal_validate_bw(struct dc *dc, if (repopulate_pipes) { int flag_max_mpc_comb = vba->maxMpcComb; int flag_vlevel = vlevel; + int i; pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); @@ -1774,7 +1789,7 @@ bool dcn32_internal_validate_bw(struct dc *dc, flag_max_mpc_comb != context->bw_ctx.dml.vba.maxMpcComb) { /* check the context constructed with pipe split flags is still valid*/ bool flags_valid = false; - for (int i = flag_vlevel; i < context->bw_ctx.dml.soc.num_states; i++) { + for (i = flag_vlevel; i < context->bw_ctx.dml.soc.num_states; i++) { if (vba->ModeSupport[i][flag_max_mpc_comb]) { vba->maxMpcComb = flag_max_mpc_comb; vba->VoltageLevel = i; @@ -1816,14 +1831,38 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, unsigned int dummy_latency_index = 0; int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + bool subvp_in_use = dcn32_subvp_in_use(dc, context); unsigned int min_dram_speed_mts_margin; + bool need_fclk_lat_as_dummy = false; + bool is_subvp_p_drr = true; dc_assert_fp_enabled(); - // Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK - if (!pstate_en && dcn32_subvp_in_use(dc, context)) { - context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; - pstate_en = true; + /* need to find dummy latency index for subvp */ + if (subvp_in_use) { + /* Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK */ + if (!pstate_en) { + context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; + pstate_en = true; + is_subvp_p_drr = true; + } + dummy_latency_index = dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(dc, + context, pipes, pipe_cnt, vlevel); + + /* For DCN32/321 need to validate with fclk pstate change latency equal to dummy so prefetch is + * scheduled correctly to account for dummy pstate. + */ + if (context->bw_ctx.dml.soc.fclk_change_latency_us < dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us) { + need_fclk_lat_as_dummy = true; + context->bw_ctx.dml.soc.fclk_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; + } + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); + if (is_subvp_p_drr) { + context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; + } } context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false; @@ -1847,9 +1886,11 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, /* For DCN32/321 need to validate with fclk pstate change latency equal to dummy so * prefetch is scheduled correctly to account for dummy pstate. */ - if (dummy_latency_index == 0) + if (context->bw_ctx.dml.soc.fclk_change_latency_us < dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us) { + need_fclk_lat_as_dummy = true; context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; + } dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; dcfclk_from_fw_based_mclk_switching = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; @@ -1951,13 +1992,13 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] == dm_dram_clock_change_unsupported) { - int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries - 1; + int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels - 1; min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16; } - if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { + if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_in_use) { /* find largest table entry that is lower than dram speed, * but lower than DPM0 still uses DPM0 */ @@ -2037,7 +2078,8 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && dummy_latency_index == 0) + /* for proper prefetch calculations, if dummy lat > fclk lat, use fclk lat = dummy lat */ + if (need_fclk_lat_as_dummy) context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; @@ -2050,10 +2092,12 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(dc, context); - if (dummy_latency_index == 0) - context->bw_ctx.dml.soc.fclk_change_latency_us = - dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us; } + + /* revert fclk lat changes if required */ + if (need_fclk_lat_as_dummy) + context->bw_ctx.dml.soc.fclk_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us; } static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index e5c8f6a71b5b..4b8f5fa0f0ad 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -670,6 +670,25 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->cursor_bw[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] / 8 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; } + v->NotEnoughDETSwathFillLatencyHiding = dml32_CalculateDETSwathFillLatencyHiding( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ReturnBW, + v->UrgentLatency, + mode_lib->vba.SwathHeightY, + mode_lib->vba.SwathHeightC, + v->swath_width_luma_ub, + v->swath_width_chroma_ub, + v->BytePerPixelDETY, + v->BytePerPixelDETC, + mode_lib->vba.DETBufferSizeY, + mode_lib->vba.DETBufferSizeC, + mode_lib->vba.DPPPerPlane, + mode_lib->vba.HTotal, + mode_lib->vba.PixelClock, + mode_lib->vba.VRatio, + mode_lib->vba.VRatioChroma, + mode_lib->vba.UsesMALLForPStateChange); + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { v->MaxVStartupLines[k] = ((mode_lib->vba.Interlace[k] && !mode_lib->vba.ProgressiveToInterlaceUnitInOPP) ? @@ -1665,6 +1684,8 @@ static void mode_support_configuration(struct vba_vars_st *v, && mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true && mode_lib->vba.NonsupportedDSCInputBPC == false && !mode_lib->vba.ExceededMALLSize + && (mode_lib->vba.NotEnoughDETSwathFillLatencyHidingPerState[i][j] == false + || i == v->soc.num_states - 1) && ((mode_lib->vba.HostVMEnable == false && !mode_lib->vba.ImmediateFlipRequiredFinal) || mode_lib->vba.ImmediateFlipSupportedForState[i][j]) @@ -2286,7 +2307,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l && (mode_lib->vba.Output[k] == dm_dp || mode_lib->vba.Output[k] == dm_dp2p0 || mode_lib->vba.Output[k] == dm_edp || mode_lib->vba.Output[k] == dm_hdmi) - && mode_lib->vba.OutputBppPerState[i][k] == 0 && (mode_lib->vba.UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)) { + && mode_lib->vba.OutputBppPerState[i][k] == 0) { mode_lib->vba.LinkCapacitySupport[i] = false; } } @@ -3158,6 +3179,25 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.UrgentBurstFactorChroma, mode_lib->vba.UrgentBurstFactorCursor); + mode_lib->vba.NotEnoughDETSwathFillLatencyHidingPerState[i][j] = dml32_CalculateDETSwathFillLatencyHiding( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ReturnBWPerState[i][j], + mode_lib->vba.UrgLatency[i], + mode_lib->vba.SwathHeightYThisState, + mode_lib->vba.SwathHeightCThisState, + mode_lib->vba.swath_width_luma_ub_this_state, + mode_lib->vba.swath_width_chroma_ub_this_state, + mode_lib->vba.BytePerPixelInDETY, + mode_lib->vba.BytePerPixelInDETC, + mode_lib->vba.DETBufferSizeYThisState, + mode_lib->vba.DETBufferSizeCThisState, + mode_lib->vba.NoOfDPPThisState, + mode_lib->vba.HTotal, + mode_lib->vba.PixelClock, + mode_lib->vba.VRatio, + mode_lib->vba.VRatioChroma, + mode_lib->vba.UsesMALLForPStateChange); + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i, mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.FabricClockPerState[i], mode_lib->vba.DRAMSpeedPerState[i]); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index debe46b24a3e..5af601cff1a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -6228,3 +6228,72 @@ void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurf *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW); *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW; } + +bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces, + double ReturnBW, + double UrgentLatency, + unsigned int SwathHeightY[], + unsigned int SwathHeightC[], + unsigned int SwathWidthY[], + unsigned int SwathWidthC[], + double BytePerPixelInDETY[], + double BytePerPixelInDETC[], + unsigned int DETBufferSizeY[], + unsigned int DETBufferSizeC[], + unsigned int NumOfDPP[], + unsigned int HTotal[], + double PixelClock[], + double VRatioY[], + double VRatioC[], + enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX]) +{ + int k; + double SwathSizeAllSurfaces = 0; + double SwathSizeAllSurfacesInFetchTimeUs; + double DETSwathLatencyHidingUs; + double DETSwathLatencyHidingYUs; + double DETSwathLatencyHidingCUs; + double SwathSizePerSurfaceY[DC__NUM_DPP__MAX]; + double SwathSizePerSurfaceC[DC__NUM_DPP__MAX]; + bool NotEnoughDETSwathFillLatencyHiding = false; + + /* calculate sum of single swath size for all pipes in bytes*/ + for (k = 0; k < NumberOfActiveSurfaces; k++) { + SwathSizePerSurfaceY[k] += SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k]; + + if (SwathHeightC[k] != 0) + SwathSizePerSurfaceC[k] += SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k]; + else + SwathSizePerSurfaceC[k] = 0; + + SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k]; + } + + SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency; + + /* ensure all DET - 1 swath can hide a fetch for all surfaces */ + for (k = 0; k < NumberOfActiveSurfaces; k++) { + double LineTime = HTotal[k] / PixelClock[k]; + + /* only care if surface is not phantom */ + if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { + DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime; + + if (SwathHeightC[k] != 0) { + DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime; + + DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs); + } else { + DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs; + } + + /* DET must be able to hide time to fetch 1 swath for each surface */ + if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) { + NotEnoughDETSwathFillLatencyHiding = true; + break; + } + } + } + + return NotEnoughDETSwathFillLatencyHiding; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 3989c2a28fae..779c6805f599 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -1141,4 +1141,22 @@ void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurf double *FractionOfUrgentBandwidth, bool *ImmediateFlipBandwidthSupport); +bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces, + double ReturnBW, + double UrgentLatency, + unsigned int SwathHeightY[], + unsigned int SwathHeightC[], + unsigned int SwathWidthY[], + unsigned int SwathWidthC[], + double BytePerPixelInDETY[], + double BytePerPixelInDETC[], + unsigned int DETBufferSizeY[], + unsigned int DETBufferSizeC[], + unsigned int NumOfDPP[], + unsigned int HTotal[], + double PixelClock[], + double VRatioY[], + double VRatioC[], + enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX]); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 432b4ecd01a7..f4b176599be7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -126,9 +126,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { .sr_enter_plus_exit_z8_time_us = 320, .writeback_latency_us = 12.0, .round_trip_ping_latency_dcfclk_cycles = 263, - .urgent_latency_pixel_data_only_us = 9.35, - .urgent_latency_pixel_mixed_with_vm_data_us = 9.35, - .urgent_latency_vm_data_only_us = 9.35, + .urgent_latency_pixel_data_only_us = 4, + .urgent_latency_pixel_mixed_with_vm_data_us = 4, + .urgent_latency_vm_data_only_us = 4, .fclk_change_latency_us = 20, .usr_retraining_latency_us = 2, .smn_latency_us = 2, @@ -156,7 +156,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { .dispclk_dppclk_vco_speed_mhz = 4300.0, .do_urgent_latency_adjustment = true, .urgent_latency_adjustment_fabric_clock_component_us = 1.0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, }; static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index d46aa4817e70..81e53e67cd0b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -1050,6 +1050,7 @@ struct vba_vars_st { double MinFullDETBufferingTime; double AverageReadBandwidthGBytePerSecond; bool FirstMainPlane; + bool NotEnoughDETSwathFillLatencyHiding; unsigned int ViewportWidthChroma[DC__NUM_DPP__MAX]; unsigned int ViewportHeightChroma[DC__NUM_DPP__MAX]; @@ -1162,7 +1163,7 @@ struct vba_vars_st { double UrgBurstFactorLumaPre[DC__NUM_DPP__MAX]; double UrgBurstFactorChromaPre[DC__NUM_DPP__MAX]; bool NotUrgentLatencyHidingPre[DC__NUM_DPP__MAX]; - bool LinkCapacitySupport[DC__NUM_DPP__MAX]; + bool LinkCapacitySupport[DC__VOLTAGE_STATES]; bool VREADY_AT_OR_AFTER_VSYNC[DC__NUM_DPP__MAX]; unsigned int MIN_DST_Y_NEXT_START[DC__NUM_DPP__MAX]; unsigned int VFrontPorch[DC__NUM_DPP__MAX]; @@ -1233,6 +1234,7 @@ struct vba_vars_st { unsigned int BlockWidthC[DC__NUM_DPP__MAX]; unsigned int SubViewportLinesNeededInMALL[DC__NUM_DPP__MAX]; bool VActiveBandwithSupport[DC__VOLTAGE_STATES][2]; + bool NotEnoughDETSwathFillLatencyHidingPerState[DC__VOLTAGE_STATES][2]; struct dummy_vars dummy_vars; }; diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c index 0ea52ba5ac82..9fd8b269dd79 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c @@ -256,8 +256,8 @@ static const struct hw_factory_funcs funcs = { */ void dal_hw_factory_dcn32_init(struct hw_factory *factory) { - factory->number_of_pins[GPIO_ID_DDC_DATA] = 6; - factory->number_of_pins[GPIO_ID_DDC_CLOCK] = 6; + factory->number_of_pins[GPIO_ID_DDC_DATA] = 8; + factory->number_of_pins[GPIO_ID_DDC_CLOCK] = 8; factory->number_of_pins[GPIO_ID_GENERIC] = 4; factory->number_of_pins[GPIO_ID_HPD] = 5; factory->number_of_pins[GPIO_ID_GPIO_PAD] = 28; diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 85495ef026f5..525f8f0b8732 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -240,8 +240,11 @@ struct resource_funcs { unsigned int pipe_cnt, unsigned int index); - bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context); + bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context, bool fast_update); + void (*retain_phantom_pipes)(struct dc *dc, struct dc_state *context); void (*get_panel_config_defaults)(struct dc_panel_config *panel_config); + void (*save_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config); + void (*restore_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config); }; struct audio_support{ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index f2e1fcb668fb..5b0265c0df61 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -46,7 +46,7 @@ struct dcn_hubbub_wm_set { uint32_t pte_meta_urgent; uint32_t sr_enter; uint32_t sr_exit; - uint32_t dram_clk_chanage; + uint32_t dram_clk_change; uint32_t usr_retrain; uint32_t fclk_pstate_change; }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index dcb80c4747b0..131fcfa28bca 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -83,10 +83,15 @@ static const struct dpp_input_csc_matrix __maybe_unused dpp_input_csc_matrix[] = {COLOR_SPACE_YCBCR709, {0x3265, 0x2000, 0, 0xe6ce, 0xf105, 0x2000, 0xfa01, 0xa7d, 0, 0x2000, 0x3b61, 0xe24f} }, - {COLOR_SPACE_YCBCR709_LIMITED, {0x39a6, 0x2568, 0, 0xe0d6, 0xeedd, 0x2568, 0xf925, 0x9a8, 0, - 0x2568, 0x43ee, 0xdbb2} } + 0x2568, 0x43ee, 0xdbb2} }, + {COLOR_SPACE_2020_YCBCR, + {0x2F30, 0x2000, 0, 0xE869, 0xEDB7, 0x2000, 0xFABC, 0xBC6, 0, + 0x2000, 0x3C34, 0xE1E6} }, + {COLOR_SPACE_2020_RGB_LIMITEDRANGE, + {0x35E0, 0x255F, 0, 0xE2B3, 0xEB20, 0x255F, 0xF9FD, 0xB1E, 0, + 0x255F, 0x44BD, 0xDB43} } }; struct dpp_grph_csc_adjustment { diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.c new file mode 100644 index 000000000000..801a95b34e8c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.c @@ -0,0 +1,28 @@ + +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ +/*********************************************************************/ +// USB4 DPIA BANDWIDTH ALLOCATION LOGIC +/*********************************************************************/ diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.h new file mode 100644 index 000000000000..669e995f825f --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.h @@ -0,0 +1,69 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef DC_INC_LINK_DP_DPIA_BW_H_ +#define DC_INC_LINK_DP_DPIA_BW_H_ + +// XXX: TODO: Re-add for Phase 2 +/* Number of Host Routers per motherboard is 2 and 2 DPIA per host router */ +#define MAX_HR_NUM 2 + +struct dc_host_router_bw_alloc { + int max_bw[MAX_HR_NUM]; // The Max BW that each Host Router has available to be shared btw DPIAs + int total_estimated_bw[MAX_HR_NUM]; // The Total Verified and available BW that Host Router has +}; + +/* + * Enable BW Allocation Mode Support from the DP-Tx side + * + * @link: pointer to the dc_link struct instance + * + * return: SUCCESS or FAILURE + */ +bool set_dptx_usb4_bw_alloc_support(struct dc_link *link); + +/* + * Send a request from DP-Tx requesting to allocate BW remotely after + * allocating it locally. This will get processed by CM and a CB function + * will be called. + * + * @link: pointer to the dc_link struct instance + * @req_bw: The requested bw in Kbyte to allocated + * + * return: none + */ +void set_usb4_req_bw_req(struct dc_link *link, int req_bw); + +/* + * CB function for when the status of the Req above is complete. We will + * find out the result of allocating on CM and update structs accordingly + * + * @link: pointer to the dc_link struct instance + * + * return: none + */ +void get_usb4_req_bw_resp(struct dc_link *link); + +#endif /* DC_INC_LINK_DP_DPIA_BW_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 795d8811af9a..33907feefebb 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -1029,13 +1029,14 @@ struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 { uint16_t vtotal; uint16_t htotal; uint8_t vblank_pipe_index; - uint8_t padding[2]; + uint8_t padding[1]; struct { uint8_t drr_in_use; uint8_t drr_window_size_ms; // Indicates largest VMIN/VMAX adjustment per frame uint16_t min_vtotal_supported; // Min VTOTAL that supports switching in VBLANK uint16_t max_vtotal_supported; // Max VTOTAL that can support SubVP static scheduling uint8_t use_ramping; // Use ramping or not + uint8_t drr_vblank_start_margin; } drr_info; // DRR considered as part of SubVP + VBLANK case } vblank_data; } pipe_config; diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 235259d6c5a1..9b5d9b2c9a6a 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -102,9 +102,18 @@ static const struct abm_parameters abm_settings_config1[abm_defines_max_level] = {0x82, 0x4d, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70, 0xcccc, 0xcccc}, }; +static const struct abm_parameters abm_settings_config2[abm_defines_max_level] = { +// min_red max_red bright_pos dark_pos bright_gain contrast dev min_knee max_knee blRed blStart + {0xf0, 0xbf, 0x20, 0x00, 0x88, 0x99, 0xb3, 0x40, 0xe0, 0x0000, 0xcccc}, + {0xd8, 0x85, 0x20, 0x00, 0x70, 0x90, 0xa8, 0x40, 0xc8, 0x0700, 0xb333}, + {0xb8, 0x58, 0x20, 0x00, 0x64, 0x88, 0x78, 0x70, 0xa0, 0x7000, 0x9999}, + {0x82, 0x40, 0x20, 0x00, 0x00, 0xb8, 0xb3, 0x70, 0x70, 0xc333, 0xb333}, +}; + static const struct abm_parameters * const abm_settings[] = { abm_settings_config0, abm_settings_config1, + abm_settings_config2, }; #define NUM_AMBI_LEVEL 5 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h index 483769fb1736..537aee0536d3 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT #ifndef _dcn_3_0_0_OFFSET_HEADER #define _dcn_3_0_0_OFFSET_HEADER diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h index b79be3a25a80..f9d90b098519 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT #ifndef _dcn_3_0_0_SH_MASK_HEADER #define _dcn_3_0_0_SH_MASK_HEADER diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h index 363d2139cea2..db7e22720d00 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h @@ -993,7 +993,8 @@ #define mmUVD_RAS_VCPU_VCODEC_STATUS_BASE_IDX 1 #define mmUVD_RAS_MMSCH_FATAL_ERROR 0x0058 #define mmUVD_RAS_MMSCH_FATAL_ERROR_BASE_IDX 1 - +#define mmVCN_RAS_CNTL 0x04b9 +#define mmVCN_RAS_CNTL_BASE_IDX 1 /* JPEG 2_6_0 regs */ #define mmUVD_RAS_JPEG0_STATUS 0x0059 diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h index 8de883b76d90..874a8b7e1feb 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h @@ -3618,6 +3618,33 @@ #define UVD_RAS_MMSCH_FATAL_ERROR__POISONED_VF_MASK 0x7FFFFFFFL #define UVD_RAS_MMSCH_FATAL_ERROR__POISONED_PF_MASK 0x80000000L +//VCN 2_6_0 VCN_RAS_CNTL +#define VCN_RAS_CNTL__VCPU_VCODEC_IH_EN__SHIFT 0x0 +#define VCN_RAS_CNTL__MMSCH_FATAL_ERROR_EN__SHIFT 0x1 +#define VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN__SHIFT 0x4 +#define VCN_RAS_CNTL__MMSCH_PMI_EN__SHIFT 0x5 +#define VCN_RAS_CNTL__VCPU_VCODEC_REARM__SHIFT 0x8 +#define VCN_RAS_CNTL__MMSCH_REARM__SHIFT 0x9 +#define VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN__SHIFT 0xc +#define VCN_RAS_CNTL__VCPU_VCODEC_READY__SHIFT 0x10 +#define VCN_RAS_CNTL__MMSCH_READY__SHIFT 0x11 +#define VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK 0x00000001L +#define VCN_RAS_CNTL__MMSCH_FATAL_ERROR_EN_MASK 0x00000002L +#define VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK 0x00000010L +#define VCN_RAS_CNTL__MMSCH_PMI_EN_MASK 0x00000020L +#define VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK 0x00000100L +#define VCN_RAS_CNTL__MMSCH_REARM_MASK 0x00000200L +#define VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK 0x00001000L +#define VCN_RAS_CNTL__VCPU_VCODEC_READY_MASK 0x00010000L +#define VCN_RAS_CNTL__MMSCH_READY_MASK 0x00020000L + +//VCN 2_6_0 UVD_VCPU_INT_EN +#define UVD_VCPU_INT_EN__RASCNTL_VCPU_VCODEC_EN__SHIFT 0x16 +#define UVD_VCPU_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK 0x00400000L + +//VCN 2_6_0 UVD_SYS_INT_EN +#define UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK 0x04000000L + /* JPEG 2_6_0 UVD_RAS_JPEG0_STATUS */ #define UVD_RAS_JPEG0_STATUS__POISONED_VF__SHIFT 0x0 #define UVD_RAS_JPEG0_STATUS__POISONED_PF__SHIFT 0x1f diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h index 4dc738c51771..b78360a71bc9 100644 --- a/drivers/gpu/drm/amd/include/atombios.h +++ b/drivers/gpu/drm/amd/include/atombios.h @@ -9292,7 +9292,7 @@ typedef struct { typedef struct { VFCT_IMAGE_HEADER VbiosHeader; - UCHAR VbiosContent[1]; + UCHAR VbiosContent[]; }GOP_VBIOS_CONTENT; typedef struct { diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index 7e85cdc5bd34..dc694cb246d9 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -222,7 +222,11 @@ union MESAPI_SET_HW_RESOURCES { uint32_t apply_grbm_remote_register_dummy_read_wa : 1; uint32_t second_gfx_pipe_enabled : 1; uint32_t enable_level_process_quantum_check : 1; - uint32_t reserved : 25; + uint32_t legacy_sch_mode : 1; + uint32_t disable_add_queue_wptr_mc_addr : 1; + uint32_t enable_mes_event_int_logging : 1; + uint32_t enable_reg_active_poll : 1; + uint32_t reserved : 21; }; uint32_t uint32_t_all; }; diff --git a/drivers/gpu/drm/amd/include/yellow_carp_offset.h b/drivers/gpu/drm/amd/include/yellow_carp_offset.h index 28a56b56bcaf..0fea6a746611 100644 --- a/drivers/gpu/drm/amd/include/yellow_carp_offset.h +++ b/drivers/gpu/drm/amd/include/yellow_carp_offset.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT #ifndef YELLOW_CARP_OFFSET_H #define YELLOW_CARP_OFFSET_H diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 20e5f66f853f..ca3beb5d8f27 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -161,7 +161,7 @@ int smu_get_dpm_freq_range(struct smu_context *smu, int smu_set_gfx_power_up_by_imu(struct smu_context *smu) { - if (!smu->ppt_funcs && !smu->ppt_funcs->set_gfx_power_up_by_imu) + if (!smu->ppt_funcs || !smu->ppt_funcs->set_gfx_power_up_by_imu) return -EOPNOTSUPP; return smu->ppt_funcs->set_gfx_power_up_by_imu(smu); @@ -585,6 +585,7 @@ static int smu_set_funcs(struct amdgpu_device *adev) yellow_carp_set_ppt_funcs(smu); break; case IP_VERSION(13, 0, 4): + case IP_VERSION(13, 0, 11): smu_v13_0_4_set_ppt_funcs(smu); break; case IP_VERSION(13, 0, 5): diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 44bbf17e4bef..3bc4128a22ac 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -168,6 +168,7 @@ struct smu_temperature_range { int mem_crit_max; int mem_emergency_max; int software_shutdown_temp; + int software_shutdown_temp_offset; }; struct smu_state_validation_block { diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h index b76f0f7e4299..d6b964cf73bd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h @@ -522,9 +522,9 @@ typedef enum { TEMP_HOTSPOT_M, TEMP_MEM, TEMP_VR_GFX, + TEMP_VR_SOC, TEMP_VR_MEM0, TEMP_VR_MEM1, - TEMP_VR_SOC, TEMP_VR_U, TEMP_LIQUID0, TEMP_LIQUID1, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h index 25c08f963f49..d6b13933a98f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h @@ -25,10 +25,10 @@ // *** IMPORTANT *** // PMFW TEAM: Always increment the interface version on any change to this file -#define SMU13_DRIVER_IF_VERSION 0x2C +#define SMU13_DRIVER_IF_VERSION 0x35 //Increment this version if SkuTable_t or BoardTable_t change -#define PPTABLE_VERSION 0x20 +#define PPTABLE_VERSION 0x27 #define NUM_GFXCLK_DPM_LEVELS 16 #define NUM_SOCCLK_DPM_LEVELS 8 @@ -96,7 +96,7 @@ #define FEATURE_MEM_TEMP_READ_BIT 47 #define FEATURE_ATHUB_MMHUB_PG_BIT 48 #define FEATURE_SOC_PCC_BIT 49 -#define FEATURE_SPARE_50_BIT 50 +#define FEATURE_EDC_PWRBRK_BIT 50 #define FEATURE_SPARE_51_BIT 51 #define FEATURE_SPARE_52_BIT 52 #define FEATURE_SPARE_53_BIT 53 @@ -282,15 +282,15 @@ typedef enum { } I2cControllerPort_e; typedef enum { - I2C_CONTROLLER_NAME_VR_GFX = 0, - I2C_CONTROLLER_NAME_VR_SOC, - I2C_CONTROLLER_NAME_VR_VMEMP, - I2C_CONTROLLER_NAME_VR_VDDIO, - I2C_CONTROLLER_NAME_LIQUID0, - I2C_CONTROLLER_NAME_LIQUID1, - I2C_CONTROLLER_NAME_PLX, - I2C_CONTROLLER_NAME_OTHER, - I2C_CONTROLLER_NAME_COUNT, + I2C_CONTROLLER_NAME_VR_GFX = 0, + I2C_CONTROLLER_NAME_VR_SOC, + I2C_CONTROLLER_NAME_VR_VMEMP, + I2C_CONTROLLER_NAME_VR_VDDIO, + I2C_CONTROLLER_NAME_LIQUID0, + I2C_CONTROLLER_NAME_LIQUID1, + I2C_CONTROLLER_NAME_PLX, + I2C_CONTROLLER_NAME_FAN_INTAKE, + I2C_CONTROLLER_NAME_COUNT, } I2cControllerName_e; typedef enum { @@ -302,6 +302,7 @@ typedef enum { I2C_CONTROLLER_THROTTLER_LIQUID0, I2C_CONTROLLER_THROTTLER_LIQUID1, I2C_CONTROLLER_THROTTLER_PLX, + I2C_CONTROLLER_THROTTLER_FAN_INTAKE, I2C_CONTROLLER_THROTTLER_INA3221, I2C_CONTROLLER_THROTTLER_COUNT, } I2cControllerThrottler_e; @@ -309,8 +310,9 @@ typedef enum { typedef enum { I2C_CONTROLLER_PROTOCOL_VR_XPDE132G5, I2C_CONTROLLER_PROTOCOL_VR_IR35217, - I2C_CONTROLLER_PROTOCOL_TMP_TMP102A, + I2C_CONTROLLER_PROTOCOL_TMP_MAX31875, I2C_CONTROLLER_PROTOCOL_INA3221, + I2C_CONTROLLER_PROTOCOL_TMP_MAX6604, I2C_CONTROLLER_PROTOCOL_COUNT, } I2cControllerProtocol_e; @@ -690,6 +692,9 @@ typedef struct { #define PP_OD_FEATURE_UCLK_BIT 8 #define PP_OD_FEATURE_ZERO_FAN_BIT 9 #define PP_OD_FEATURE_TEMPERATURE_BIT 10 +#define PP_OD_FEATURE_POWER_FEATURE_CTRL_BIT 11 +#define PP_OD_FEATURE_ASIC_TDC_BIT 12 +#define PP_OD_FEATURE_COUNT 13 typedef enum { PP_OD_POWER_FEATURE_ALWAYS_ENABLED, @@ -697,6 +702,11 @@ typedef enum { PP_OD_POWER_FEATURE_ALWAYS_DISABLED, } PP_OD_POWER_FEATURE_e; +typedef enum { + FAN_MODE_AUTO = 0, + FAN_MODE_MANUAL_LINEAR, +} FanMode_e; + typedef struct { uint32_t FeatureCtrlMask; @@ -708,8 +718,8 @@ typedef struct { uint8_t RuntimePwrSavingFeaturesCtrl; //Frequency changes - int16_t GfxclkFmin; // MHz - int16_t GfxclkFmax; // MHz + int16_t GfxclkFmin; // MHz + int16_t GfxclkFmax; // MHz uint16_t UclkFmin; // MHz uint16_t UclkFmax; // MHz @@ -730,7 +740,12 @@ typedef struct { uint8_t MaxOpTemp; uint8_t Padding[4]; - uint32_t Spare[12]; + uint16_t GfxVoltageFullCtrlMode; + uint16_t GfxclkFullCtrlMode; + uint16_t UclkFullCtrlMode; + int16_t AsicTdc; + + uint32_t Spare[10]; uint32_t MmHubPadding[8]; // SMU internal use. Adding here instead of external as a workaround } OverDriveTable_t; @@ -748,8 +763,8 @@ typedef struct { uint8_t IdlePwrSavingFeaturesCtrl; uint8_t RuntimePwrSavingFeaturesCtrl; - uint16_t GfxclkFmin; // MHz - uint16_t GfxclkFmax; // MHz + int16_t GfxclkFmin; // MHz + int16_t GfxclkFmax; // MHz uint16_t UclkFmin; // MHz uint16_t UclkFmax; // MHz @@ -769,7 +784,12 @@ typedef struct { uint8_t MaxOpTemp; uint8_t Padding[4]; - uint32_t Spare[12]; + uint16_t GfxVoltageFullCtrlMode; + uint16_t GfxclkFullCtrlMode; + uint16_t UclkFullCtrlMode; + int16_t AsicTdc; + + uint32_t Spare[10]; } OverDriveLimits_t; @@ -903,7 +923,8 @@ typedef struct { uint16_t FanStartTempMin; uint16_t FanStartTempMax; - uint32_t Spare[12]; + uint16_t PowerMinPpt0[POWER_SOURCE_COUNT]; + uint32_t Spare[11]; } MsgLimits_t; @@ -1086,11 +1107,13 @@ typedef struct { uint32_t GfxoffSpare[15]; // GFX GPO - float DfllBtcMasterScalerM; + uint32_t DfllBtcMasterScalerM; int32_t DfllBtcMasterScalerB; - float DfllBtcSlaveScalerM; + uint32_t DfllBtcSlaveScalerM; int32_t DfllBtcSlaveScalerB; - uint32_t GfxGpoSpare[12]; + uint32_t DfllPccAsWaitCtrl; //GDFLL_AS_WAIT_CTRL_PCC register value to be passed to RLC msg + uint32_t DfllPccAsStepCtrl; //GDFLL_AS_STEP_CTRL_PCC register value to be passed to RLC msg + uint32_t GfxGpoSpare[10]; // GFX DCS @@ -1106,7 +1129,10 @@ typedef struct { uint16_t DcsTimeout; //This is the amount of time SMU FW waits for RLC to put GFX into GFXOFF before reverting to the fallback mechanism of throttling GFXCLK to Fmin. - uint32_t DcsSpare[16]; + uint32_t DcsSpare[14]; + + // UCLK section + uint16_t ShadowFreqTableUclk[NUM_UCLK_DPM_LEVELS]; // In MHz // UCLK section uint8_t UseStrobeModeOptimizations; //Set to indicate that FW should use strobe mode optimizations @@ -1163,13 +1189,14 @@ typedef struct { uint16_t IntakeTempHighIntakeAcousticLimit; uint16_t IntakeTempAcouticLimitReleaseRate; - uint16_t FanStalledTempLimitOffset; + int16_t FanAbnormalTempLimitOffset; uint16_t FanStalledTriggerRpm; - uint16_t FanAbnormalTriggerRpm; - uint16_t FanPadding; - - uint32_t FanSpare[14]; + uint16_t FanAbnormalTriggerRpmCoeff; + uint16_t FanAbnormalDetectionEnable; + uint8_t FanIntakeSensorSupport; + uint8_t FanIntakePadding[3]; + uint32_t FanSpare[13]; // SECTION: VDD_GFX AVFS uint8_t OverrideGfxAvfsFuses; @@ -1193,7 +1220,6 @@ typedef struct { uint32_t dGbV_dT_vmin; uint32_t dGbV_dT_vmax; - //Unused: PMFW-9370 uint32_t V2F_vmin_range_low; uint32_t V2F_vmin_range_high; uint32_t V2F_vmax_range_low; @@ -1238,8 +1264,21 @@ typedef struct { // SECTION: Advanced Options uint32_t DebugOverrides; + // Section: Total Board Power idle vs active coefficients + uint8_t TotalBoardPowerSupport; + uint8_t TotalBoardPowerPadding[3]; + + int16_t TotalIdleBoardPowerM; + int16_t TotalIdleBoardPowerB; + int16_t TotalBoardPowerM; + int16_t TotalBoardPowerB; + + QuadraticInt_t qFeffCoeffGameClock[POWER_SOURCE_COUNT]; + QuadraticInt_t qFeffCoeffBaseClock[POWER_SOURCE_COUNT]; + QuadraticInt_t qFeffCoeffBoostClock[POWER_SOURCE_COUNT]; + // SECTION: Sku Reserved - uint32_t Spare[64]; + uint32_t Spare[43]; // Padding for MMHUB - do not modify this uint32_t MmHubPadding[8]; @@ -1304,7 +1343,8 @@ typedef struct { // SECTION: Clock Spread Spectrum // UCLK Spread Spectrum - uint16_t UclkSpreadPadding; + uint8_t UclkTrainingModeSpreadPercent; // Q4.4 + uint8_t UclkSpreadPadding; uint16_t UclkSpreadFreq; // kHz // UCLK Spread Spectrum @@ -1317,11 +1357,7 @@ typedef struct { // Section: Memory Config uint8_t DramWidth; // Width of interface to the channel for each DRAM module. See DRAM_BIT_WIDTH_TYPE_e - uint8_t PaddingMem1[3]; - - // Section: Total Board Power - uint16_t TotalBoardPower; //Only needed for TCP Estimated case, where TCP = TGP+Total Board Power - uint16_t BoardPowerPadding; + uint8_t PaddingMem1[7]; // SECTION: UMC feature flags uint8_t HsrEnabled; @@ -1423,8 +1459,11 @@ typedef struct { uint16_t Vcn1ActivityPercentage ; uint32_t EnergyAccumulator; - uint16_t AverageSocketPower ; + uint16_t AverageSocketPower; + uint16_t AverageTotalBoardPower; + uint16_t AvgTemperature[TEMP_COUNT]; + uint16_t AvgTemperatureFanIntake; uint8_t PcieRate ; uint8_t PcieWidth ; @@ -1592,5 +1631,7 @@ typedef struct { #define IH_INTERRUPT_CONTEXT_ID_AUDIO_D0 0x5 #define IH_INTERRUPT_CONTEXT_ID_AUDIO_D3 0x6 #define IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING 0x7 +#define IH_INTERRUPT_CONTEXT_ID_FAN_ABNORMAL 0x8 +#define IH_INTERRUPT_CONTEXT_ID_FAN_RECOVERY 0x9 #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h index 9ebb8f39732a..8b8266890a10 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h @@ -131,7 +131,13 @@ #define PPSMC_MSG_EnableAudioStutterWA 0x44 #define PPSMC_MSG_PowerUpUmsch 0x45 #define PPSMC_MSG_PowerDownUmsch 0x46 -#define PPSMC_Message_Count 0x47 +#define PPSMC_MSG_SetDcsArch 0x47 +#define PPSMC_MSG_TriggerVFFLR 0x48 +#define PPSMC_MSG_SetNumBadMemoryPagesRetired 0x49 +#define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A +#define PPSMC_MSG_SetPriorityDeltaGain 0x4B +#define PPSMC_MSG_AllowIHHostInterrupt 0x4C +#define PPSMC_Message_Count 0x4D //Debug Dump Message #define DEBUGSMC_MSG_TestMessage 0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index 58098b82df66..4180c71d930f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -239,7 +239,10 @@ __SMU_DUMMY_MAP(DriverMode2Reset), \ __SMU_DUMMY_MAP(GetGfxOffStatus), \ __SMU_DUMMY_MAP(GetGfxOffEntryCount), \ - __SMU_DUMMY_MAP(LogGfxOffResidency), + __SMU_DUMMY_MAP(LogGfxOffResidency), \ + __SMU_DUMMY_MAP(SetNumBadMemoryPagesRetired), \ + __SMU_DUMMY_MAP(SetBadMemoryPagesRetiredFlagsPerChannel), \ + __SMU_DUMMY_MAP(AllowGpo), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index b7f4569aff2a..e8c6febb8b64 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -28,10 +28,11 @@ #define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04 #define SMU13_DRIVER_IF_VERSION_ALDE 0x08 +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0 0x34 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x35 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_10 0x1D #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms @@ -272,6 +273,9 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu); int smu_v13_0_run_btc(struct smu_context *smu); +int smu_v13_0_gpo_control(struct smu_context *smu, + bool enablement); + int smu_v13_0_deep_sleep_control(struct smu_context *smu, bool enablement); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 70b560737687..ad66d57aa102 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -79,6 +79,17 @@ MODULE_FIRMWARE("amdgpu/beige_goby_smc.bin"); #define mmTHM_BACO_CNTL_ARCT 0xA7 #define mmTHM_BACO_CNTL_ARCT_BASE_IDX 0 +static void smu_v11_0_poll_baco_exit(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t data, loop = 0; + + do { + usleep_range(1000, 1100); + data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL); + } while ((data & 0x100) && (++loop < 100)); +} + int smu_v11_0_init_microcode(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; @@ -1588,6 +1599,10 @@ bool smu_v11_0_baco_is_support(struct smu_context *smu) if (amdgpu_sriov_vf(smu->adev) || !smu_baco->platform_support) return false; + /* return true if ASIC is in BACO state already */ + if (smu_v11_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER) + return true; + /* Arcturus does not support this bit mask */ if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) && !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT)) @@ -1685,7 +1700,18 @@ int smu_v11_0_baco_enter(struct smu_context *smu) int smu_v11_0_baco_exit(struct smu_context *smu) { - return smu_v11_0_baco_set_state(smu, SMU_BACO_STATE_EXIT); + int ret; + + ret = smu_v11_0_baco_set_state(smu, SMU_BACO_STATE_EXIT); + if (!ret) { + /* + * Poll BACO exit status to ensure FW has completed + * BACO exit process to avoid timing issues. + */ + smu_v11_0_poll_baco_exit(smu); + } + + return ret; } int smu_v11_0_mode1_reset(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 89f0f6eb19f3..e54b760b875b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -250,6 +250,7 @@ int smu_v13_0_check_fw_status(struct smu_context *smu) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 4): + case IP_VERSION(13, 0, 11): mp1_fw_flags = RREG32_PCIE(MP1_Public | (smnMP1_V13_0_4_FIRMWARE_FLAGS & 0xffffffff)); break; @@ -289,6 +290,8 @@ int smu_v13_0_check_fw_version(struct smu_context *smu) smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_ALDE; break; case IP_VERSION(13, 0, 0): + smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0; + break; case IP_VERSION(13, 0, 10): smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10; break; @@ -301,6 +304,7 @@ int smu_v13_0_check_fw_version(struct smu_context *smu) smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_YELLOW_CARP; break; case IP_VERSION(13, 0, 4): + case IP_VERSION(13, 0, 11): smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_4; break; case IP_VERSION(13, 0, 5): @@ -841,6 +845,7 @@ int smu_v13_0_gfx_off_control(struct smu_context *smu, bool enable) case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 11): if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) return 0; if (enable) @@ -1376,6 +1381,7 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev, */ uint32_t ctxid = entry->src_data[0]; uint32_t data; + uint32_t high; if (client_id == SOC15_IH_CLIENTID_THM) { switch (src_id) { @@ -1432,6 +1438,36 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev, schedule_work(&smu->throttling_logging_work); break; + case 0x8: + high = smu->thermal_range.software_shutdown_temp + + smu->thermal_range.software_shutdown_temp_offset; + high = min_t(typeof(high), + SMU_THERMAL_MAXIMUM_ALERT_TEMP, + high); + dev_emerg(adev->dev, "Reduce soft CTF limit to %d (by an offset %d)\n", + high, + smu->thermal_range.software_shutdown_temp_offset); + + data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL); + data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL, + DIG_THERM_INTH, + (high & 0xff)); + data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); + WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data); + break; + case 0x9: + high = min_t(typeof(high), + SMU_THERMAL_MAXIMUM_ALERT_TEMP, + smu->thermal_range.software_shutdown_temp); + dev_emerg(adev->dev, "Recover soft CTF limit to %d\n", high); + + data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL); + data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL, + DIG_THERM_INTH, + (high & 0xff)); + data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); + WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data); + break; } } } @@ -2146,6 +2182,21 @@ int smu_v13_0_run_btc(struct smu_context *smu) return res; } +int smu_v13_0_gpo_control(struct smu_context *smu, + bool enablement) +{ + int res; + + res = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_AllowGpo, + enablement ? 1 : 0, + NULL); + if (res) + dev_err(smu->adev->dev, "SetGpoAllow %d failed!\n", enablement); + + return res; +} + int smu_v13_0_deep_sleep_control(struct smu_context *smu, bool enablement) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 5bcb61f77e41..9643b21c636a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -141,6 +141,10 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0), MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), + MSG_MAP(SetNumBadMemoryPagesRetired, PPSMC_MSG_SetNumBadMemoryPagesRetired, 0), + MSG_MAP(SetBadMemoryPagesRetiredFlagsPerChannel, + PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, 0), + MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0), }; static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = { @@ -207,6 +211,8 @@ static struct cmn2asic_mapping smu_v13_0_0_feature_mask_map[SMU_FEATURE_COUNT] = FEA_MAP(MEM_TEMP_READ), FEA_MAP(ATHUB_MMHUB_PG), FEA_MAP(SOC_PCC), + [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, + [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, }; static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = { @@ -537,6 +543,23 @@ static int smu_v13_0_0_set_default_dpm_table(struct smu_context *smu) dpm_table); if (ret) return ret; + + /* + * Update the reported maximum shader clock to the value + * which can be guarded to be achieved on all cards. This + * is aligned with Window setting. And considering that value + * might be not the peak frequency the card can achieve, it + * is normal some real-time clock frequency can overtake this + * labelled maximum clock frequency(for example in pp_dpm_sclk + * sysfs output). + */ + if (skutable->DriverReportedClocks.GameClockAc && + (dpm_table->dpm_levels[dpm_table->count - 1].value > + skutable->DriverReportedClocks.GameClockAc)) { + dpm_table->dpm_levels[dpm_table->count - 1].value = + skutable->DriverReportedClocks.GameClockAc; + dpm_table->max = skutable->DriverReportedClocks.GameClockAc; + } } else { dpm_table->count = 1; dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100; @@ -799,6 +822,57 @@ static int smu_v13_0_0_get_smu_metrics_data(struct smu_context *smu, return ret; } +static int smu_v13_0_0_get_dpm_ultimate_freq(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *min, + uint32_t *max) +{ + struct smu_13_0_dpm_context *dpm_context = + smu->smu_dpm.dpm_context; + struct smu_13_0_dpm_table *dpm_table; + + switch (clk_type) { + case SMU_MCLK: + case SMU_UCLK: + /* uclk dpm table */ + dpm_table = &dpm_context->dpm_tables.uclk_table; + break; + case SMU_GFXCLK: + case SMU_SCLK: + /* gfxclk dpm table */ + dpm_table = &dpm_context->dpm_tables.gfx_table; + break; + case SMU_SOCCLK: + /* socclk dpm table */ + dpm_table = &dpm_context->dpm_tables.soc_table; + break; + case SMU_FCLK: + /* fclk dpm table */ + dpm_table = &dpm_context->dpm_tables.fclk_table; + break; + case SMU_VCLK: + case SMU_VCLK1: + /* vclk dpm table */ + dpm_table = &dpm_context->dpm_tables.vclk_table; + break; + case SMU_DCLK: + case SMU_DCLK1: + /* dclk dpm table */ + dpm_table = &dpm_context->dpm_tables.dclk_table; + break; + default: + dev_err(smu->adev->dev, "Unsupported clock type!\n"); + return -EINVAL; + } + + if (min) + *min = dpm_table->min; + if (max) + *max = dpm_table->max; + + return 0; +} + static int smu_v13_0_0_read_sensor(struct smu_context *smu, enum amd_pp_sensors sensor, void *data, @@ -1301,9 +1375,17 @@ static int smu_v13_0_0_populate_umd_state_clk(struct smu_context *smu) &dpm_context->dpm_tables.fclk_table; struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; + struct smu_table_context *table_context = &smu->smu_table; + PPTable_t *pptable = table_context->driver_pptable; + DriverReportedClocks_t driver_clocks = + pptable->SkuTable.DriverReportedClocks; pstate_table->gfxclk_pstate.min = gfx_table->min; - pstate_table->gfxclk_pstate.peak = gfx_table->max; + if (driver_clocks.GameClockAc && + (driver_clocks.GameClockAc < gfx_table->max)) + pstate_table->gfxclk_pstate.peak = driver_clocks.GameClockAc; + else + pstate_table->gfxclk_pstate.peak = gfx_table->max; pstate_table->uclk_pstate.min = mem_table->min; pstate_table->uclk_pstate.peak = mem_table->max; @@ -1320,12 +1402,12 @@ static int smu_v13_0_0_populate_umd_state_clk(struct smu_context *smu) pstate_table->fclk_pstate.min = fclk_table->min; pstate_table->fclk_pstate.peak = fclk_table->max; - /* - * For now, just use the mininum clock frequency. - * TODO: update them when the real pstate settings available - */ - pstate_table->gfxclk_pstate.standard = gfx_table->min; - pstate_table->uclk_pstate.standard = mem_table->min; + if (driver_clocks.BaseClockAc && + driver_clocks.BaseClockAc < gfx_table->max) + pstate_table->gfxclk_pstate.standard = driver_clocks.BaseClockAc; + else + pstate_table->gfxclk_pstate.standard = gfx_table->max; + pstate_table->uclk_pstate.standard = mem_table->max; pstate_table->socclk_pstate.standard = soc_table->min; pstate_table->vclk_pstate.standard = vclk_table->min; pstate_table->dclk_pstate.standard = dclk_table->min; @@ -1359,12 +1441,23 @@ out: static int smu_v13_0_0_get_fan_speed_pwm(struct smu_context *smu, uint32_t *speed) { + int ret; + if (!speed) return -EINVAL; - return smu_v13_0_0_get_smu_metrics_data(smu, - METRICS_CURR_FANPWM, - speed); + ret = smu_v13_0_0_get_smu_metrics_data(smu, + METRICS_CURR_FANPWM, + speed); + if (ret) { + dev_err(smu->adev->dev, "Failed to get fan speed(PWM)!"); + return ret; + } + + /* Convert the PMFW output which is in percent to pwm(255) based */ + *speed = MIN(*speed * 255 / 100, 255); + + return 0; } static int smu_v13_0_0_get_fan_speed_rpm(struct smu_context *smu, @@ -1838,6 +1931,40 @@ static void smu_v13_0_0_set_smu_mailbox_registers(struct smu_context *smu) smu->debug_resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_54); } +static int smu_v13_0_0_smu_send_bad_mem_page_num(struct smu_context *smu, + uint32_t size) +{ + int ret = 0; + + /* message SMU to update the bad page number on SMUBUS */ + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetNumBadMemoryPagesRetired, + size, NULL); + if (ret) + dev_err(smu->adev->dev, + "[%s] failed to message SMU to update bad memory pages number\n", + __func__); + + return ret; +} + +static int smu_v13_0_0_send_bad_mem_channel_flag(struct smu_context *smu, + uint32_t size) +{ + int ret = 0; + + /* message SMU to update the bad channel info on SMUBUS */ + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, + size, NULL); + if (ret) + dev_err(smu->adev->dev, + "[%s] failed to message SMU to update bad memory pages channel info\n", + __func__); + + return ret; +} + static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, @@ -1862,7 +1989,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_enabled_mask = smu_cmn_get_enabled_mask, .dpm_set_vcn_enable = smu_v13_0_set_vcn_enable, .dpm_set_jpeg_enable = smu_v13_0_set_jpeg_enable, - .get_dpm_ultimate_freq = smu_v13_0_get_dpm_ultimate_freq, + .get_dpm_ultimate_freq = smu_v13_0_0_get_dpm_ultimate_freq, .get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values, .read_sensor = smu_v13_0_0_read_sensor, .feature_is_enabled = smu_cmn_feature_is_enabled, @@ -1908,6 +2035,9 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .mode1_reset = smu_v13_0_0_mode1_reset, .set_mp1_state = smu_v13_0_0_set_mp1_state, .set_df_cstate = smu_v13_0_0_set_df_cstate, + .send_hbm_bad_pages_num = smu_v13_0_0_smu_send_bad_mem_page_num, + .send_hbm_bad_channel_flag = smu_v13_0_0_send_bad_mem_channel_flag, + .gpo_control = smu_v13_0_gpo_control, }; void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c index 97e1d55dcaad..8fa9a36c38b6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c @@ -1026,6 +1026,15 @@ static const struct pptable_funcs smu_v13_0_4_ppt_funcs = { .set_gfx_power_up_by_imu = smu_v13_0_set_gfx_power_up_by_imu, }; +static void smu_v13_0_4_set_smu_mailbox_registers(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82); + smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66); + smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90); +} + void smu_v13_0_4_set_ppt_funcs(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; @@ -1035,7 +1044,9 @@ void smu_v13_0_4_set_ppt_funcs(struct smu_context *smu) smu->feature_map = smu_v13_0_4_feature_mask_map; smu->table_map = smu_v13_0_4_table_map; smu->is_apu = true; - smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82); - smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66); - smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90); + + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 4)) + smu_v13_0_4_set_smu_mailbox_registers(smu); + else + smu_v13_0_set_smu_mailbox_registers(smu); } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index d74debc584f8..5c6c6ad011ca 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -123,6 +123,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(SetMGpuFanBoostLimitRpm, PPSMC_MSG_SetMGpuFanBoostLimitRpm, 0), MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), + MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0), }; static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = { @@ -189,6 +190,8 @@ static struct cmn2asic_mapping smu_v13_0_7_feature_mask_map[SMU_FEATURE_COUNT] = FEA_MAP(MEM_TEMP_READ), FEA_MAP(ATHUB_MMHUB_PG), FEA_MAP(SOC_PCC), + [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, + [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, }; static struct cmn2asic_mapping smu_v13_0_7_table_map[SMU_TABLE_COUNT] = { @@ -1223,6 +1226,7 @@ static int smu_v13_0_7_get_thermal_temperature_range(struct smu_context *smu, range->mem_emergency_max = (pptable->SkuTable.TemperatureLimit[TEMP_MEM] + CTF_OFFSET_MEM)* SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; range->software_shutdown_temp = powerplay_table->software_shutdown_temp; + range->software_shutdown_temp_offset = pptable->SkuTable.FanAbnormalTempLimitOffset; return 0; } @@ -1359,12 +1363,23 @@ static int smu_v13_0_7_populate_umd_state_clk(struct smu_context *smu) static int smu_v13_0_7_get_fan_speed_pwm(struct smu_context *smu, uint32_t *speed) { + int ret; + if (!speed) return -EINVAL; - return smu_v13_0_7_get_smu_metrics_data(smu, - METRICS_CURR_FANPWM, - speed); + ret = smu_v13_0_7_get_smu_metrics_data(smu, + METRICS_CURR_FANPWM, + speed); + if (ret) { + dev_err(smu->adev->dev, "Failed to get fan speed(PWM)!"); + return ret; + } + + /* Convert the PMFW output which is in percent to pwm(255) based */ + *speed = MIN(*speed * 255 / 100, 255); + + return 0; } static int smu_v13_0_7_get_fan_speed_rpm(struct smu_context *smu, @@ -1436,7 +1451,7 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu, static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf) { - DpmActivityMonitorCoeffIntExternal_t activity_monitor_external[PP_SMC_POWER_PROFILE_COUNT]; + DpmActivityMonitorCoeffIntExternal_t *activity_monitor_external; uint32_t i, j, size = 0; int16_t workload_type = 0; int result = 0; @@ -1444,6 +1459,12 @@ static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf if (!buf) return -EINVAL; + activity_monitor_external = kcalloc(PP_SMC_POWER_PROFILE_COUNT, + sizeof(*activity_monitor_external), + GFP_KERNEL); + if (!activity_monitor_external) + return -ENOMEM; + size += sysfs_emit_at(buf, size, " "); for (i = 0; i <= PP_SMC_POWER_PROFILE_WINDOW3D; i++) size += sysfs_emit_at(buf, size, "%-14s%s", amdgpu_pp_profile_name[i], @@ -1456,15 +1477,17 @@ static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf workload_type = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_WORKLOAD, i); - if (workload_type < 0) - return -EINVAL; + if (workload_type < 0) { + result = -EINVAL; + goto out; + } result = smu_cmn_update_table(smu, SMU_TABLE_ACTIVITY_MONITOR_COEFF, workload_type, (void *)(&activity_monitor_external[i]), false); if (result) { dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); - return result; + goto out; } } @@ -1492,7 +1515,10 @@ do { \ PRINT_DPM_MONITOR(Fclk_BoosterFreq); #undef PRINT_DPM_MONITOR - return size; + result = size; +out: + kfree(activity_monitor_external); + return result; } static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size) @@ -1687,6 +1713,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .mode1_reset = smu_v13_0_mode1_reset, .set_mp1_state = smu_v13_0_7_set_mp1_state, .set_df_cstate = smu_v13_0_7_set_df_cstate, + .gpo_control = smu_v13_0_gpo_control, }; void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c index 5be6562c2a19..6a614e54b383 100644 --- a/drivers/gpu/drm/bridge/parade-ps8640.c +++ b/drivers/gpu/drm/bridge/parade-ps8640.c @@ -105,6 +105,7 @@ struct ps8640 { struct gpio_desc *gpio_powerdown; struct device_link *link; bool pre_enabled; + bool need_post_hpd_delay; }; static const struct regmap_config ps8640_regmap_config[] = { @@ -173,14 +174,31 @@ static int _ps8640_wait_hpd_asserted(struct ps8640 *ps_bridge, unsigned long wai { struct regmap *map = ps_bridge->regmap[PAGE2_TOP_CNTL]; int status; + int ret; /* * Apparently something about the firmware in the chip signals that * HPD goes high by reporting GPIO9 as high (even though HPD isn't * actually connected to GPIO9). */ - return regmap_read_poll_timeout(map, PAGE2_GPIO_H, status, - status & PS_GPIO9, wait_us / 10, wait_us); + ret = regmap_read_poll_timeout(map, PAGE2_GPIO_H, status, + status & PS_GPIO9, wait_us / 10, wait_us); + + /* + * The first time we see HPD go high after a reset we delay an extra + * 50 ms. The best guess is that the MCU is doing "stuff" during this + * time (maybe talking to the panel) and we don't want to interrupt it. + * + * No locking is done around "need_post_hpd_delay". If we're here we + * know we're holding a PM Runtime reference and the only other place + * that touches this is PM Runtime resume. + */ + if (!ret && ps_bridge->need_post_hpd_delay) { + ps_bridge->need_post_hpd_delay = false; + msleep(50); + } + + return ret; } static int ps8640_wait_hpd_asserted(struct drm_dp_aux *aux, unsigned long wait_us) @@ -388,6 +406,9 @@ static int __maybe_unused ps8640_resume(struct device *dev) msleep(50); gpiod_set_value(ps_bridge->gpio_reset, 0); + /* We just reset things, so we need a delay after the first HPD */ + ps_bridge->need_post_hpd_delay = true; + /* * Mystery 200 ms delay for the "MCU to be ready". It's unclear if * this is truly necessary since the MCU will already signal that diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 40d8ca37f5bc..aa51c61a78c7 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -2720,6 +2720,9 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge, * if supported. In any case the default RGB888 format is added */ + /* Default 8bit RGB fallback */ + output_fmts[i++] = MEDIA_BUS_FMT_RGB888_1X24; + if (max_bpc >= 16 && info->bpc == 16) { if (info->color_formats & DRM_COLOR_FORMAT_YCBCR444) output_fmts[i++] = MEDIA_BUS_FMT_YUV16_1X48; @@ -2753,9 +2756,6 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge, if (info->color_formats & DRM_COLOR_FORMAT_YCBCR444) output_fmts[i++] = MEDIA_BUS_FMT_YUV8_1X24; - /* Default 8bit RGB fallback */ - output_fmts[i++] = MEDIA_BUS_FMT_RGB888_1X24; - *num_output_fmts = i; return output_fmts; diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 3c3561942eb6..05f8756d1aaf 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -931,9 +931,9 @@ static void ti_sn_bridge_set_video_timings(struct ti_sn65dsi86 *pdata) &pdata->bridge.encoder->crtc->state->adjusted_mode; u8 hsync_polarity = 0, vsync_polarity = 0; - if (mode->flags & DRM_MODE_FLAG_PHSYNC) + if (mode->flags & DRM_MODE_FLAG_NHSYNC) hsync_polarity = CHA_HSYNC_POLARITY; - if (mode->flags & DRM_MODE_FLAG_PVSYNC) + if (mode->flags & DRM_MODE_FLAG_NVSYNC) vsync_polarity = CHA_VSYNC_POLARITY; ti_sn65dsi86_write_u16(pdata, SN_CHA_ACTIVE_LINE_LENGTH_LOW_REG, @@ -1500,8 +1500,8 @@ out: return ret; } -static void ti_sn_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) +static int ti_sn_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state) { struct ti_sn65dsi86 *pdata = pwm_chip_to_ti_sn_bridge(chip); unsigned int pwm_en_inv; @@ -1512,19 +1512,19 @@ static void ti_sn_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, ret = regmap_read(pdata->regmap, SN_PWM_EN_INV_REG, &pwm_en_inv); if (ret) - return; + return ret; ret = ti_sn65dsi86_read_u16(pdata, SN_BACKLIGHT_SCALE_REG, &scale); if (ret) - return; + return ret; ret = ti_sn65dsi86_read_u16(pdata, SN_BACKLIGHT_REG, &backlight); if (ret) - return; + return ret; ret = regmap_read(pdata->regmap, SN_PWM_PRE_DIV_REG, &pre_div); if (ret) - return; + return ret; state->enabled = FIELD_GET(SN_PWM_EN_MASK, pwm_en_inv); if (FIELD_GET(SN_PWM_INV_MASK, pwm_en_inv)) @@ -1539,6 +1539,8 @@ static void ti_sn_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, if (state->duty_cycle > state->period) state->duty_cycle = state->period; + + return 0; } static const struct pwm_ops ti_sn_pwm_ops = { diff --git a/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c index 3ea53bb67d3b..bd61e20770a5 100644 --- a/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c +++ b/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c @@ -63,23 +63,45 @@ ssize_t drm_dp_dual_mode_read(struct i2c_adapter *adapter, u8 offset, void *buffer, size_t size) { + u8 zero = 0; + char *tmpbuf = NULL; + /* + * As sub-addressing is not supported by all adaptors, + * always explicitly read from the start and discard + * any bytes that come before the requested offset. + * This way, no matter whether the adaptor supports it + * or not, we'll end up reading the proper data. + */ struct i2c_msg msgs[] = { { .addr = DP_DUAL_MODE_SLAVE_ADDRESS, .flags = 0, .len = 1, - .buf = &offset, + .buf = &zero, }, { .addr = DP_DUAL_MODE_SLAVE_ADDRESS, .flags = I2C_M_RD, - .len = size, + .len = size + offset, .buf = buffer, }, }; int ret; + if (offset) { + tmpbuf = kmalloc(size + offset, GFP_KERNEL); + if (!tmpbuf) + return -ENOMEM; + + msgs[1].buf = tmpbuf; + } + ret = i2c_transfer(adapter, msgs, ARRAY_SIZE(msgs)); + if (tmpbuf) + memcpy(buffer, tmpbuf + offset, size); + + kfree(tmpbuf); + if (ret < 0) return ret; if (ret != ARRAY_SIZE(msgs)) @@ -208,18 +230,6 @@ enum drm_dp_dual_mode_type drm_dp_dual_mode_detect(const struct drm_device *dev, if (ret) return DRM_DP_DUAL_MODE_UNKNOWN; - /* - * Sigh. Some (maybe all?) type 1 adaptors are broken and ack - * the offset but ignore it, and instead they just always return - * data from the start of the HDMI ID buffer. So for a broken - * type 1 HDMI adaptor a single byte read will always give us - * 0x44, and for a type 1 DVI adaptor it should give 0x00 - * (assuming it implements any registers). Fortunately neither - * of those values will match the type 2 signature of the - * DP_DUAL_MODE_ADAPTOR_ID register so we can proceed with - * the type 2 adaptor detection safely even in the presence - * of broken type 1 adaptors. - */ ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_ADAPTOR_ID, &adaptor_id, sizeof(adaptor_id)); drm_dbg_kms(dev, "DP dual mode adaptor ID: %02x (err %zd)\n", adaptor_id, ret); @@ -233,11 +243,10 @@ enum drm_dp_dual_mode_type drm_dp_dual_mode_detect(const struct drm_device *dev, return DRM_DP_DUAL_MODE_TYPE2_DVI; } /* - * If neither a proper type 1 ID nor a broken type 1 adaptor - * as described above, assume type 1, but let the user know - * that we may have misdetected the type. + * If not a proper type 1 ID, still assume type 1, but let + * the user know that we may have misdetected the type. */ - if (!is_type1_adaptor(adaptor_id) && adaptor_id != hdmi_id[0]) + if (!is_type1_adaptor(adaptor_id)) drm_err(dev, "Unexpected DP dual mode adaptor ID %02x\n", adaptor_id); } @@ -343,10 +352,8 @@ EXPORT_SYMBOL(drm_dp_dual_mode_get_tmds_output); * @enable: enable (as opposed to disable) the TMDS output buffers * * Set the state of the TMDS output buffers in the adaptor. For - * type2 this is set via the DP_DUAL_MODE_TMDS_OEN register. As - * some type 1 adaptors have problems with registers (see comments - * in drm_dp_dual_mode_detect()) we avoid touching the register, - * making this function a no-op on type 1 adaptors. + * type2 this is set via the DP_DUAL_MODE_TMDS_OEN register. + * Type1 adaptors do not support any register writes. * * Returns: * 0 on success, negative error code on failure diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 8214a0b1ab7f..73b845a75d52 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -35,6 +35,7 @@ #include <linux/slab.h> #include <linux/srcu.h> +#include <drm/drm_accel.h> #include <drm/drm_cache.h> #include <drm/drm_client.h> #include <drm/drm_color_mgmt.h> @@ -90,6 +91,8 @@ static struct drm_minor **drm_minor_get_slot(struct drm_device *dev, return &dev->primary; case DRM_MINOR_RENDER: return &dev->render; + case DRM_MINOR_ACCEL: + return &dev->accel; default: BUG(); } @@ -104,9 +107,13 @@ static void drm_minor_alloc_release(struct drm_device *dev, void *data) put_device(minor->kdev); - spin_lock_irqsave(&drm_minor_lock, flags); - idr_remove(&drm_minors_idr, minor->index); - spin_unlock_irqrestore(&drm_minor_lock, flags); + if (minor->type == DRM_MINOR_ACCEL) { + accel_minor_remove(minor->index); + } else { + spin_lock_irqsave(&drm_minor_lock, flags); + idr_remove(&drm_minors_idr, minor->index); + spin_unlock_irqrestore(&drm_minor_lock, flags); + } } static int drm_minor_alloc(struct drm_device *dev, unsigned int type) @@ -123,13 +130,17 @@ static int drm_minor_alloc(struct drm_device *dev, unsigned int type) minor->dev = dev; idr_preload(GFP_KERNEL); - spin_lock_irqsave(&drm_minor_lock, flags); - r = idr_alloc(&drm_minors_idr, - NULL, - 64 * type, - 64 * (type + 1), - GFP_NOWAIT); - spin_unlock_irqrestore(&drm_minor_lock, flags); + if (type == DRM_MINOR_ACCEL) { + r = accel_minor_alloc(); + } else { + spin_lock_irqsave(&drm_minor_lock, flags); + r = idr_alloc(&drm_minors_idr, + NULL, + 64 * type, + 64 * (type + 1), + GFP_NOWAIT); + spin_unlock_irqrestore(&drm_minor_lock, flags); + } idr_preload_end(); if (r < 0) @@ -161,10 +172,14 @@ static int drm_minor_register(struct drm_device *dev, unsigned int type) if (!minor) return 0; - ret = drm_debugfs_init(minor, minor->index, drm_debugfs_root); - if (ret) { - DRM_ERROR("DRM: Failed to initialize /sys/kernel/debug/dri.\n"); - goto err_debugfs; + if (minor->type == DRM_MINOR_ACCEL) { + accel_debugfs_init(minor, minor->index); + } else { + ret = drm_debugfs_init(minor, minor->index, drm_debugfs_root); + if (ret) { + DRM_ERROR("DRM: Failed to initialize /sys/kernel/debug/dri.\n"); + goto err_debugfs; + } } ret = device_add(minor->kdev); @@ -172,9 +187,13 @@ static int drm_minor_register(struct drm_device *dev, unsigned int type) goto err_debugfs; /* replace NULL with @minor so lookups will succeed from now on */ - spin_lock_irqsave(&drm_minor_lock, flags); - idr_replace(&drm_minors_idr, minor, minor->index); - spin_unlock_irqrestore(&drm_minor_lock, flags); + if (minor->type == DRM_MINOR_ACCEL) { + accel_minor_replace(minor, minor->index); + } else { + spin_lock_irqsave(&drm_minor_lock, flags); + idr_replace(&drm_minors_idr, minor, minor->index); + spin_unlock_irqrestore(&drm_minor_lock, flags); + } DRM_DEBUG("new minor registered %d\n", minor->index); return 0; @@ -194,9 +213,13 @@ static void drm_minor_unregister(struct drm_device *dev, unsigned int type) return; /* replace @minor with NULL so lookups will fail from now on */ - spin_lock_irqsave(&drm_minor_lock, flags); - idr_replace(&drm_minors_idr, NULL, minor->index); - spin_unlock_irqrestore(&drm_minor_lock, flags); + if (minor->type == DRM_MINOR_ACCEL) { + accel_minor_replace(NULL, minor->index); + } else { + spin_lock_irqsave(&drm_minor_lock, flags); + idr_replace(&drm_minors_idr, NULL, minor->index); + spin_unlock_irqrestore(&drm_minor_lock, flags); + } device_del(minor->kdev); dev_set_drvdata(minor->kdev, NULL); /* safety belt */ @@ -603,6 +626,13 @@ static int drm_dev_init(struct drm_device *dev, /* no per-device feature limits by default */ dev->driver_features = ~0u; + if (drm_core_check_feature(dev, DRIVER_COMPUTE_ACCEL) && + (drm_core_check_feature(dev, DRIVER_RENDER) || + drm_core_check_feature(dev, DRIVER_MODESET))) { + DRM_ERROR("DRM driver can't be both a compute acceleration and graphics driver\n"); + return -EINVAL; + } + drm_legacy_init_members(dev); INIT_LIST_HEAD(&dev->filelist); INIT_LIST_HEAD(&dev->filelist_internal); @@ -615,7 +645,7 @@ static int drm_dev_init(struct drm_device *dev, mutex_init(&dev->clientlist_mutex); mutex_init(&dev->master_mutex); - ret = drmm_add_action(dev, drm_dev_init_release, NULL); + ret = drmm_add_action_or_reset(dev, drm_dev_init_release, NULL); if (ret) return ret; @@ -628,15 +658,21 @@ static int drm_dev_init(struct drm_device *dev, dev->anon_inode = inode; - if (drm_core_check_feature(dev, DRIVER_RENDER)) { - ret = drm_minor_alloc(dev, DRM_MINOR_RENDER); + if (drm_core_check_feature(dev, DRIVER_COMPUTE_ACCEL)) { + ret = drm_minor_alloc(dev, DRM_MINOR_ACCEL); if (ret) goto err; - } + } else { + if (drm_core_check_feature(dev, DRIVER_RENDER)) { + ret = drm_minor_alloc(dev, DRM_MINOR_RENDER); + if (ret) + goto err; + } - ret = drm_minor_alloc(dev, DRM_MINOR_PRIMARY); - if (ret) - goto err; + ret = drm_minor_alloc(dev, DRM_MINOR_PRIMARY); + if (ret) + goto err; + } ret = drm_legacy_create_map_hash(dev); if (ret) @@ -883,6 +919,10 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags) if (ret) goto err_minors; + ret = drm_minor_register(dev, DRM_MINOR_ACCEL); + if (ret) + goto err_minors; + ret = create_compat_control_link(dev); if (ret) goto err_minors; @@ -902,12 +942,13 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags) driver->name, driver->major, driver->minor, driver->patchlevel, driver->date, dev->dev ? dev_name(dev->dev) : "virtual device", - dev->primary->index); + dev->primary ? dev->primary->index : dev->accel->index); goto out_unlock; err_minors: remove_compat_control_link(dev); + drm_minor_unregister(dev, DRM_MINOR_ACCEL); drm_minor_unregister(dev, DRM_MINOR_PRIMARY); drm_minor_unregister(dev, DRM_MINOR_RENDER); out_unlock: @@ -950,6 +991,7 @@ void drm_dev_unregister(struct drm_device *dev) drm_legacy_rmmaps(dev); remove_compat_control_link(dev); + drm_minor_unregister(dev, DRM_MINOR_ACCEL); drm_minor_unregister(dev, DRM_MINOR_PRIMARY); drm_minor_unregister(dev, DRM_MINOR_RENDER); } @@ -1034,6 +1076,7 @@ static const struct file_operations drm_stub_fops = { static void drm_core_exit(void) { drm_privacy_screen_lookup_exit(); + accel_core_exit(); unregister_chrdev(DRM_MAJOR, "drm"); debugfs_remove(drm_debugfs_root); drm_sysfs_destroy(); @@ -1061,6 +1104,10 @@ static int __init drm_core_init(void) if (ret < 0) goto error; + ret = accel_core_init(); + if (ret < 0) + goto error; + drm_privacy_screen_lookup_init(); drm_core_init_complete = true; diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index a8b4d918e9a3..64b4a3a87fbb 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -326,7 +326,7 @@ static int drm_cpu_valid(void) * Creates and initializes a drm_file structure for the file private data in \p * filp and add it into the double linked list in \p dev. */ -static int drm_open_helper(struct file *filp, struct drm_minor *minor) +int drm_open_helper(struct file *filp, struct drm_minor *minor) { struct drm_device *dev = minor->dev; struct drm_file *priv; diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index 653a5821dd53..74ff33c2ddaa 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -817,6 +817,38 @@ static bool is_listed_fourcc(const uint32_t *fourccs, size_t nfourccs, uint32_t return false; } +static const uint32_t conv_from_xrgb8888[] = { + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_ARGB2101010, + DRM_FORMAT_RGB565, + DRM_FORMAT_RGB888, +}; + +static const uint32_t conv_from_rgb565_888[] = { + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, +}; + +static bool is_conversion_supported(uint32_t from, uint32_t to) +{ + switch (from) { + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: + return is_listed_fourcc(conv_from_xrgb8888, ARRAY_SIZE(conv_from_xrgb8888), to); + case DRM_FORMAT_RGB565: + case DRM_FORMAT_RGB888: + return is_listed_fourcc(conv_from_rgb565_888, ARRAY_SIZE(conv_from_rgb565_888), to); + case DRM_FORMAT_XRGB2101010: + return to == DRM_FORMAT_ARGB2101010; + case DRM_FORMAT_ARGB2101010: + return to == DRM_FORMAT_XRGB2101010; + default: + return false; + } +} + /** * drm_fb_build_fourcc_list - Filters a list of supported color formats against * the device's native formats @@ -837,7 +869,9 @@ static bool is_listed_fourcc(const uint32_t *fourccs, size_t nfourccs, uint32_t * be handed over to drm_universal_plane_init() et al. Native formats * will go before emulated formats. Other heuristics might be applied * to optimize the order. Formats near the beginning of the list are - * usually preferred over formats near the end of the list. + * usually preferred over formats near the end of the list. Formats + * without conversion helpers will be skipped. New drivers should only + * pass in XRGB8888 and avoid exposing additional emulated formats. * * Returns: * The number of color-formats 4CC codes returned in @fourccs_out. @@ -849,7 +883,7 @@ size_t drm_fb_build_fourcc_list(struct drm_device *dev, { u32 *fourccs = fourccs_out; const u32 *fourccs_end = fourccs_out + nfourccs_out; - bool found_native = false; + uint32_t native_format = 0; size_t i; /* @@ -868,27 +902,19 @@ size_t drm_fb_build_fourcc_list(struct drm_device *dev, drm_dbg_kms(dev, "adding native format %p4cc\n", &fourcc); - if (!found_native) - found_native = is_listed_fourcc(driver_fourccs, driver_nfourccs, fourcc); + /* + * There should only be one native format with the current API. + * This API needs to be refactored to correctly support arbitrary + * sets of native formats, since it needs to report which native + * format to use for each emulated format. + */ + if (!native_format) + native_format = fourcc; *fourccs = fourcc; ++fourccs; } /* - * The plane's atomic_update helper converts the framebuffer's color format - * to a native format when copying to device memory. - * - * If there is not a single format supported by both, device and - * driver, the native formats are likely not supported by the conversion - * helpers. Therefore *only* support the native formats and add a - * conversion helper ASAP. - */ - if (!found_native) { - drm_warn(dev, "Format conversion helpers required to add extra formats.\n"); - goto out; - } - - /* * The extra formats, emulated by the driver, go second. */ @@ -900,6 +926,9 @@ size_t drm_fb_build_fourcc_list(struct drm_device *dev, } else if (fourccs == fourccs_end) { drm_warn(dev, "Ignoring emulated format %p4cc\n", &fourcc); continue; /* end of available output buffer */ + } else if (!is_conversion_supported(fourcc, native_format)) { + drm_dbg_kms(dev, "Unsupported emulated format %p4cc\n", &fourcc); + continue; /* format is not supported for conversion */ } drm_dbg_kms(dev, "adding emulated format %p4cc\n", &fourcc); @@ -908,7 +937,6 @@ size_t drm_fb_build_fourcc_list(struct drm_device *dev, ++fourccs; } -out: return fourccs - fourccs_out; } EXPORT_SYMBOL(drm_fb_build_fourcc_list); diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 35138f8a375c..b602cd72a120 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -571,12 +571,20 @@ static void drm_gem_shmem_vm_open(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); - int ret; WARN_ON(shmem->base.import_attach); - ret = drm_gem_shmem_get_pages(shmem); - WARN_ON_ONCE(ret != 0); + mutex_lock(&shmem->pages_lock); + + /* + * We should have already pinned the pages when the buffer was first + * mmap'd, vm_open() just grabs an additional reference for the new + * mm the vma is getting copied into (ie. on fork()). + */ + if (!WARN_ON_ONCE(!shmem->pages_use_count)) + shmem->pages_use_count++; + + mutex_unlock(&shmem->pages_lock); drm_gem_vm_open(vma); } @@ -622,10 +630,8 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct } ret = drm_gem_shmem_get_pages(shmem); - if (ret) { - drm_gem_vm_close(vma); + if (ret) return ret; - } vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 7bb98e6a446d..5ea5e260118c 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -104,7 +104,8 @@ static inline void drm_vblank_flush_worker(struct drm_vblank_crtc *vblank) static inline void drm_vblank_destroy_worker(struct drm_vblank_crtc *vblank) { - kthread_destroy_worker(vblank->worker); + if (vblank->worker) + kthread_destroy_worker(vblank->worker); } int drm_vblank_worker_init(struct drm_vblank_crtc *vblank); diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 8a0c0e0bb5bd..52d8800a8ab8 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -134,6 +134,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "One S1003"), }, .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* Acer Switch V 10 (SW5-017) */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "SW5-017"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* Anbernic Win600 */ .matches = { DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Anbernic"), @@ -319,6 +325,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_MATCH(DMI_BIOS_VERSION, "BLADE_21"), }, .driver_data = (void *)&lcd1200x1920_rightside_up, + }, { /* Nanote UMPC-01 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "RWC CO.,LTD"), + DMI_MATCH(DMI_PRODUCT_NAME, "UMPC-01"), + }, + .driver_data = (void *)&lcd1200x1920_rightside_up, }, { /* OneGX1 Pro */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SYSTEM_MANUFACTURER"), diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index 430e00b16eec..183130355997 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -19,6 +19,7 @@ #include <linux/kdev_t.h> #include <linux/slab.h> +#include <drm/drm_accel.h> #include <drm/drm_connector.h> #include <drm/drm_device.h> #include <drm/drm_file.h> @@ -90,7 +91,7 @@ static void drm_sysfs_acpi_register(void) { } static void drm_sysfs_acpi_unregister(void) { } #endif -static char *drm_devnode(struct device *dev, umode_t *mode) +static char *drm_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "dri/%s", dev_name(dev)); } @@ -471,19 +472,26 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor) struct device *kdev; int r; - if (minor->type == DRM_MINOR_RENDER) - minor_str = "renderD%d"; - else - minor_str = "card%d"; - kdev = kzalloc(sizeof(*kdev), GFP_KERNEL); if (!kdev) return ERR_PTR(-ENOMEM); device_initialize(kdev); - kdev->devt = MKDEV(DRM_MAJOR, minor->index); - kdev->class = drm_class; - kdev->type = &drm_sysfs_device_minor; + + if (minor->type == DRM_MINOR_ACCEL) { + minor_str = "accel%d"; + accel_set_device_instance_params(kdev, minor->index); + } else { + if (minor->type == DRM_MINOR_RENDER) + minor_str = "renderD%d"; + else + minor_str = "card%d"; + + kdev->devt = MKDEV(DRM_MAJOR, minor->index); + kdev->class = drm_class; + kdev->type = &drm_sysfs_device_minor; + } + kdev->parent = minor->dev->dev; kdev->release = drm_sysfs_release; dev_set_drvdata(kdev, minor); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 68e4446a94ad..c5ae5492e1af 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -643,6 +643,7 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj) struct page **pvec = NULL; struct etnaviv_gem_userptr *userptr = &etnaviv_obj->userptr; int ret, pinned = 0, npages = etnaviv_obj->base.size >> PAGE_SHIFT; + unsigned int gup_flags = FOLL_LONGTERM; might_lock_read(¤t->mm->mmap_lock); @@ -653,14 +654,15 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj) if (!pvec) return -ENOMEM; + if (!userptr->ro) + gup_flags |= FOLL_WRITE; + do { unsigned num_pages = npages - pinned; uint64_t ptr = userptr->ptr + pinned * PAGE_SIZE; struct page **pages = pvec + pinned; - ret = pin_user_pages_fast(ptr, num_pages, - FOLL_WRITE | FOLL_FORCE | FOLL_LONGTERM, - pages); + ret = pin_user_pages_fast(ptr, num_pages, gup_flags, pages); if (ret < 0) { unpin_user_pages(pvec, pinned); kvfree(pvec); diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 471fd6c8135f..e19c2ceb3759 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -477,7 +477,7 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct g2d_data *g2d, } ret = pin_user_pages_fast(start, npages, - FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM, + FOLL_WRITE | FOLL_LONGTERM, g2d_userptr->pages); if (ret != npages) { DRM_DEV_ERROR(g2d->dev, diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c index 4d4a715b429d..2c2b92324a2e 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c @@ -60,8 +60,9 @@ static int fsl_dcu_drm_connector_get_modes(struct drm_connector *connector) return drm_panel_get_modes(fsl_connector->panel, connector); } -static int fsl_dcu_drm_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) +static enum drm_mode_status +fsl_dcu_drm_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) { if (mode->hdisplay & 0xf) return MODE_ERROR; diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c index d444e7fffb54..a14d2896aebb 100644 --- a/drivers/gpu/drm/i2c/tda998x_drv.c +++ b/drivers/gpu/drm/i2c/tda998x_drv.c @@ -1174,6 +1174,8 @@ static int tda998x_audio_codec_init(struct tda998x_priv *priv, struct hdmi_codec_pdata codec_data = { .ops = &audio_codec_ops, .max_i2s_channels = 2, + .no_i2s_capture = 1, + .no_spdif_capture = 1, }; if (priv->audio_port_enable[AUDIO_ROUTE_I2S]) diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c index 3593938dcd87..24ef36ec2d3d 100644 --- a/drivers/gpu/drm/i915/display/g4x_dp.c +++ b/drivers/gpu/drm/i915/display/g4x_dp.c @@ -673,8 +673,6 @@ static void intel_enable_dp(struct intel_atomic_state *state, intel_dp_pcon_dsc_configure(intel_dp, pipe_config); intel_dp_start_link_train(intel_dp, pipe_config); intel_dp_stop_link_train(intel_dp, pipe_config); - - intel_audio_codec_enable(encoder, pipe_config, conn_state); } static void g4x_enable_dp(struct intel_atomic_state *state, @@ -683,6 +681,7 @@ static void g4x_enable_dp(struct intel_atomic_state *state, const struct drm_connector_state *conn_state) { intel_enable_dp(state, encoder, pipe_config, conn_state); + intel_audio_codec_enable(encoder, pipe_config, conn_state); intel_edp_backlight_on(pipe_config, conn_state); } @@ -691,6 +690,7 @@ static void vlv_enable_dp(struct intel_atomic_state *state, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { + intel_audio_codec_enable(encoder, pipe_config, conn_state); intel_edp_backlight_on(pipe_config, conn_state); } diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c index 121caeaa409b..c3580d96765c 100644 --- a/drivers/gpu/drm/i915/display/g4x_hdmi.c +++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c @@ -157,10 +157,8 @@ static void intel_hdmi_get_config(struct intel_encoder *encoder, &pipe_config->infoframes.hdmi); } -static void g4x_enable_hdmi(struct intel_atomic_state *state, - struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config, - const struct drm_connector_state *conn_state) +static void g4x_hdmi_enable_port(struct intel_encoder *encoder, + const struct intel_crtc_state *pipe_config) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -175,6 +173,16 @@ static void g4x_enable_hdmi(struct intel_atomic_state *state, intel_de_write(dev_priv, intel_hdmi->hdmi_reg, temp); intel_de_posting_read(dev_priv, intel_hdmi->hdmi_reg); +} + +static void g4x_enable_hdmi(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + g4x_hdmi_enable_port(encoder, pipe_config); drm_WARN_ON(&dev_priv->drm, pipe_config->has_audio && !pipe_config->has_hdmi_sink); @@ -294,6 +302,11 @@ static void vlv_enable_hdmi(struct intel_atomic_state *state, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + drm_WARN_ON(&dev_priv->drm, pipe_config->has_audio && + !pipe_config->has_hdmi_sink); + intel_audio_codec_enable(encoder, pipe_config, conn_state); } static void intel_disable_hdmi(struct intel_atomic_state *state, @@ -415,7 +428,7 @@ static void vlv_hdmi_pre_enable(struct intel_atomic_state *state, pipe_config->has_infoframe, pipe_config, conn_state); - g4x_enable_hdmi(state, encoder, pipe_config, conn_state); + g4x_hdmi_enable_port(encoder, pipe_config); vlv_wait_port_ready(dev_priv, dig_port, 0x0); } @@ -492,7 +505,7 @@ static void chv_hdmi_pre_enable(struct intel_atomic_state *state, pipe_config->has_infoframe, pipe_config, conn_state); - g4x_enable_hdmi(state, encoder, pipe_config, conn_state); + g4x_hdmi_enable_port(encoder, pipe_config); vlv_wait_port_ready(dev_priv, dig_port, 0x0); diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index c2987f2c2b2e..572a4e3769f3 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -414,7 +414,7 @@ static void *generate_lfp_data_ptrs(struct drm_i915_private *i915, ptrs->lvds_entries++; if (size != 0 || ptrs->lvds_entries != 3) { - kfree(ptrs); + kfree(ptrs_block); return NULL; } diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index b3e23708d194..6c2686ecb62a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3733,12 +3733,16 @@ out: static u8 bigjoiner_pipes(struct drm_i915_private *i915) { + u8 pipes; + if (DISPLAY_VER(i915) >= 12) - return BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D); + pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D); else if (DISPLAY_VER(i915) >= 11) - return BIT(PIPE_B) | BIT(PIPE_C); + pipes = BIT(PIPE_B) | BIT(PIPE_C); else - return 0; + pipes = 0; + + return pipes & RUNTIME_INFO(i915)->pipe_mask; } static bool transcoder_ddi_func_is_enabled(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 081a4d0083b1..eff3add70611 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -52,8 +52,8 @@ #define DISPLAY_VER12_DMC_MAX_FW_SIZE ICL_DMC_MAX_FW_SIZE -#define DG2_DMC_PATH DMC_PATH(dg2, 2, 07) -#define DG2_DMC_VERSION_REQUIRED DMC_VERSION(2, 07) +#define DG2_DMC_PATH DMC_PATH(dg2, 2, 08) +#define DG2_DMC_VERSION_REQUIRED DMC_VERSION(2, 8) MODULE_FIRMWARE(DG2_DMC_PATH); #define ADLP_DMC_PATH DMC_PATH(adlp, 2, 16) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 67089711d9e2..75070eb07d4b 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -3679,61 +3679,6 @@ static void intel_dp_phy_pattern_update(struct intel_dp *intel_dp, } } -static void -intel_dp_autotest_phy_ddi_disable(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) -{ - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = to_intel_crtc(dig_port->base.base.crtc); - enum pipe pipe = crtc->pipe; - u32 trans_ddi_func_ctl_value, trans_conf_value, dp_tp_ctl_value; - - trans_ddi_func_ctl_value = intel_de_read(dev_priv, - TRANS_DDI_FUNC_CTL(pipe)); - trans_conf_value = intel_de_read(dev_priv, PIPECONF(pipe)); - dp_tp_ctl_value = intel_de_read(dev_priv, TGL_DP_TP_CTL(pipe)); - - trans_ddi_func_ctl_value &= ~(TRANS_DDI_FUNC_ENABLE | - TGL_TRANS_DDI_PORT_MASK); - trans_conf_value &= ~PIPECONF_ENABLE; - dp_tp_ctl_value &= ~DP_TP_CTL_ENABLE; - - intel_de_write(dev_priv, PIPECONF(pipe), trans_conf_value); - intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(pipe), - trans_ddi_func_ctl_value); - intel_de_write(dev_priv, TGL_DP_TP_CTL(pipe), dp_tp_ctl_value); -} - -static void -intel_dp_autotest_phy_ddi_enable(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) -{ - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - enum port port = dig_port->base.port; - struct intel_crtc *crtc = to_intel_crtc(dig_port->base.base.crtc); - enum pipe pipe = crtc->pipe; - u32 trans_ddi_func_ctl_value, trans_conf_value, dp_tp_ctl_value; - - trans_ddi_func_ctl_value = intel_de_read(dev_priv, - TRANS_DDI_FUNC_CTL(pipe)); - trans_conf_value = intel_de_read(dev_priv, PIPECONF(pipe)); - dp_tp_ctl_value = intel_de_read(dev_priv, TGL_DP_TP_CTL(pipe)); - - trans_ddi_func_ctl_value |= TRANS_DDI_FUNC_ENABLE | - TGL_TRANS_DDI_SELECT_PORT(port); - trans_conf_value |= PIPECONF_ENABLE; - dp_tp_ctl_value |= DP_TP_CTL_ENABLE; - - intel_de_write(dev_priv, PIPECONF(pipe), trans_conf_value); - intel_de_write(dev_priv, TGL_DP_TP_CTL(pipe), dp_tp_ctl_value); - intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(pipe), - trans_ddi_func_ctl_value); -} - static void intel_dp_process_phy_request(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { @@ -3752,14 +3697,10 @@ static void intel_dp_process_phy_request(struct intel_dp *intel_dp, intel_dp_get_adjust_train(intel_dp, crtc_state, DP_PHY_DPRX, link_status); - intel_dp_autotest_phy_ddi_disable(intel_dp, crtc_state); - intel_dp_set_signal_levels(intel_dp, crtc_state, DP_PHY_DPRX); intel_dp_phy_pattern_update(intel_dp, crtc_state); - intel_dp_autotest_phy_ddi_enable(intel_dp, crtc_state); - drm_dp_dpcd_write(&intel_dp->aux, DP_TRAINING_LANE0_SET, intel_dp->train_set, crtc_state->lane_count); diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index 75e8cc4337c9..2cbc1292ab38 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -41,9 +41,11 @@ #include "i915_drv.h" #include "i915_reg.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dsi.h" #include "intel_dsi_vbt.h" +#include "intel_gmbus_regs.h" #include "vlv_dsi.h" #include "vlv_dsi_regs.h" #include "vlv_sideband.h" @@ -137,9 +139,9 @@ static enum port intel_dsi_seq_port_to_port(struct intel_dsi *intel_dsi, return ffs(intel_dsi->ports) - 1; if (seq_port) { - if (intel_dsi->ports & PORT_B) + if (intel_dsi->ports & BIT(PORT_B)) return PORT_B; - else if (intel_dsi->ports & PORT_C) + else if (intel_dsi->ports & BIT(PORT_C)) return PORT_C; } @@ -377,6 +379,85 @@ static void icl_exec_gpio(struct intel_connector *connector, drm_dbg_kms(&dev_priv->drm, "Skipping ICL GPIO element execution\n"); } +enum { + MIPI_RESET_1 = 0, + MIPI_AVDD_EN_1, + MIPI_BKLT_EN_1, + MIPI_AVEE_EN_1, + MIPI_VIO_EN_1, + MIPI_RESET_2, + MIPI_AVDD_EN_2, + MIPI_BKLT_EN_2, + MIPI_AVEE_EN_2, + MIPI_VIO_EN_2, +}; + +static void icl_native_gpio_set_value(struct drm_i915_private *dev_priv, + int gpio, bool value) +{ + int index; + + if (drm_WARN_ON(&dev_priv->drm, DISPLAY_VER(dev_priv) == 11 && gpio >= MIPI_RESET_2)) + return; + + switch (gpio) { + case MIPI_RESET_1: + case MIPI_RESET_2: + index = gpio == MIPI_RESET_1 ? HPD_PORT_A : HPD_PORT_B; + + /* + * Disable HPD to set the pin to output, and set output + * value. The HPD pin should not be enabled for DSI anyway, + * assuming the board design and VBT are sane, and the pin isn't + * used by a non-DSI encoder. + * + * The locking protects against concurrent SHOTPLUG_CTL_DDI + * modifications in irq setup and handling. + */ + spin_lock_irq(&dev_priv->irq_lock); + intel_de_rmw(dev_priv, SHOTPLUG_CTL_DDI, + SHOTPLUG_CTL_DDI_HPD_ENABLE(index) | + SHOTPLUG_CTL_DDI_HPD_OUTPUT_DATA(index), + value ? SHOTPLUG_CTL_DDI_HPD_OUTPUT_DATA(index) : 0); + spin_unlock_irq(&dev_priv->irq_lock); + break; + case MIPI_AVDD_EN_1: + case MIPI_AVDD_EN_2: + index = gpio == MIPI_AVDD_EN_1 ? 0 : 1; + + intel_de_rmw(dev_priv, PP_CONTROL(index), PANEL_POWER_ON, + value ? PANEL_POWER_ON : 0); + break; + case MIPI_BKLT_EN_1: + case MIPI_BKLT_EN_2: + index = gpio == MIPI_BKLT_EN_1 ? 0 : 1; + + intel_de_rmw(dev_priv, PP_CONTROL(index), EDP_BLC_ENABLE, + value ? EDP_BLC_ENABLE : 0); + break; + case MIPI_AVEE_EN_1: + case MIPI_AVEE_EN_2: + index = gpio == MIPI_AVEE_EN_1 ? 1 : 2; + + intel_de_rmw(dev_priv, GPIO(dev_priv, index), + GPIO_CLOCK_VAL_OUT, + GPIO_CLOCK_DIR_MASK | GPIO_CLOCK_DIR_OUT | + GPIO_CLOCK_VAL_MASK | (value ? GPIO_CLOCK_VAL_OUT : 0)); + break; + case MIPI_VIO_EN_1: + case MIPI_VIO_EN_2: + index = gpio == MIPI_VIO_EN_1 ? 1 : 2; + + intel_de_rmw(dev_priv, GPIO(dev_priv, index), + GPIO_DATA_VAL_OUT, + GPIO_DATA_DIR_MASK | GPIO_DATA_DIR_OUT | + GPIO_DATA_VAL_MASK | (value ? GPIO_DATA_VAL_OUT : 0)); + break; + default: + MISSING_CASE(gpio); + } +} + static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) { struct drm_device *dev = intel_dsi->base.base.dev; @@ -384,8 +465,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) struct intel_connector *connector = intel_dsi->attached_connector; u8 gpio_source, gpio_index = 0, gpio_number; bool value; - - drm_dbg_kms(&dev_priv->drm, "\n"); + bool native = DISPLAY_VER(dev_priv) >= 11; if (connector->panel.vbt.dsi.seq_version >= 3) gpio_index = *data++; @@ -398,10 +478,18 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) else gpio_source = 0; + if (connector->panel.vbt.dsi.seq_version >= 4 && *data & BIT(1)) + native = false; + /* pull up/down */ value = *data++ & 1; - if (DISPLAY_VER(dev_priv) >= 11) + drm_dbg_kms(&dev_priv->drm, "GPIO index %u, number %u, source %u, native %s, set to %s\n", + gpio_index, gpio_number, gpio_source, str_yes_no(native), str_on_off(value)); + + if (native) + icl_native_gpio_set_value(dev_priv, gpio_number, value); + else if (DISPLAY_VER(dev_priv) >= 11) icl_exec_gpio(connector, gpio_source, gpio_index, value); else if (IS_VALLEYVIEW(dev_priv)) vlv_exec_gpio(connector, gpio_source, gpio_number, value); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 29e9e8d5b6fe..f266b68cf012 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -730,32 +730,69 @@ static int eb_reserve(struct i915_execbuffer *eb) bool unpinned; /* - * Attempt to pin all of the buffers into the GTT. - * This is done in 2 phases: + * We have one more buffers that we couldn't bind, which could be due to + * various reasons. To resolve this we have 4 passes, with every next + * level turning the screws tighter: * - * 1. Unbind all objects that do not match the GTT constraints for - * the execbuffer (fenceable, mappable, alignment etc). - * 2. Bind new objects. + * 0. Unbind all objects that do not match the GTT constraints for the + * execbuffer (fenceable, mappable, alignment etc). Bind all new + * objects. This avoids unnecessary unbinding of later objects in order + * to make room for the earlier objects *unless* we need to defragment. * - * This avoid unnecessary unbinding of later objects in order to make - * room for the earlier objects *unless* we need to defragment. + * 1. Reorder the buffers, where objects with the most restrictive + * placement requirements go first (ignoring fixed location buffers for + * now). For example, objects needing the mappable aperture (the first + * 256M of GTT), should go first vs objects that can be placed just + * about anywhere. Repeat the previous pass. * - * Defragmenting is skipped if all objects are pinned at a fixed location. + * 2. Consider buffers that are pinned at a fixed location. Also try to + * evict the entire VM this time, leaving only objects that we were + * unable to lock. Try again to bind the buffers. (still using the new + * buffer order). + * + * 3. We likely have object lock contention for one or more stubborn + * objects in the VM, for which we need to evict to make forward + * progress (perhaps we are fighting the shrinker?). When evicting the + * VM this time around, anything that we can't lock we now track using + * the busy_bo, using the full lock (after dropping the vm->mutex to + * prevent deadlocks), instead of trylock. We then continue to evict the + * VM, this time with the stubborn object locked, which we can now + * hopefully unbind (if still bound in the VM). Repeat until the VM is + * evicted. Finally we should be able bind everything. */ - for (pass = 0; pass <= 2; pass++) { + for (pass = 0; pass <= 3; pass++) { int pin_flags = PIN_USER | PIN_VALIDATE; if (pass == 0) pin_flags |= PIN_NONBLOCK; if (pass >= 1) - unpinned = eb_unbind(eb, pass == 2); + unpinned = eb_unbind(eb, pass >= 2); if (pass == 2) { err = mutex_lock_interruptible(&eb->context->vm->mutex); if (!err) { - err = i915_gem_evict_vm(eb->context->vm, &eb->ww); + err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL); + mutex_unlock(&eb->context->vm->mutex); + } + if (err) + return err; + } + + if (pass == 3) { +retry: + err = mutex_lock_interruptible(&eb->context->vm->mutex); + if (!err) { + struct drm_i915_gem_object *busy_bo = NULL; + + err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo); mutex_unlock(&eb->context->vm->mutex); + if (err && busy_bo) { + err = i915_gem_object_lock(busy_bo, &eb->ww); + i915_gem_object_put(busy_bo); + if (!err) + goto retry; + } } if (err) return err; @@ -2427,7 +2464,7 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, /* Check whether the file_priv has already selected one ring. */ if ((int)file_priv->bsd_engine < 0) file_priv->bsd_engine = - prandom_u32_max(num_vcs_engines(dev_priv)); + get_random_u32_below(num_vcs_engines(dev_priv)); return file_priv->bsd_engine; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index c29efdef8313..0ad44f3868de 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -369,7 +369,7 @@ retry: if (vma == ERR_PTR(-ENOSPC)) { ret = mutex_lock_interruptible(&ggtt->vm.mutex); if (!ret) { - ret = i915_gem_evict_vm(&ggtt->vm, &ww); + ret = i915_gem_evict_vm(&ggtt->vm, &ww, NULL); mutex_unlock(&ggtt->vm.mutex); } if (ret) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 733696057761..1a0886b8aaa1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -785,6 +785,9 @@ bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj) if (!HAS_FLAT_CCS(to_i915(obj->base.dev))) return false; + if (obj->flags & I915_BO_ALLOC_CCS_AUX) + return true; + for (i = 0; i < obj->mm.n_placements; i++) { /* Compression is not allowed for the objects with smem placement */ if (obj->mm.placements[i]->type == INTEL_MEMORY_SYSTEM) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index d0d6772e6f36..ab4c2f90a564 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -327,16 +327,18 @@ struct drm_i915_gem_object { * dealing with userspace objects the CPU fault handler is free to ignore this. */ #define I915_BO_ALLOC_GPU_ONLY BIT(6) +#define I915_BO_ALLOC_CCS_AUX BIT(7) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ I915_BO_ALLOC_CPU_CLEAR | \ I915_BO_ALLOC_USER | \ I915_BO_ALLOC_PM_VOLATILE | \ I915_BO_ALLOC_PM_EARLY | \ - I915_BO_ALLOC_GPU_ONLY) -#define I915_BO_READONLY BIT(7) -#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */ -#define I915_BO_PROTECTED BIT(9) + I915_BO_ALLOC_GPU_ONLY | \ + I915_BO_ALLOC_CCS_AUX) +#define I915_BO_READONLY BIT(8) +#define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */ +#define I915_BO_PROTECTED BIT(10) /** * @mem_flags - Mutable placement-related flags * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c index 07e49f22f2de..7e67742bc65e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c @@ -50,6 +50,7 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region *apply, container_of(bo->bdev, typeof(*i915), bdev); struct drm_i915_gem_object *backup; struct ttm_operation_ctx ctx = {}; + unsigned int flags; int err = 0; if (bo->resource->mem_type == I915_PL_SYSTEM || obj->ttm.backup) @@ -65,7 +66,22 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region *apply, if (obj->flags & I915_BO_ALLOC_PM_VOLATILE) return 0; - backup = i915_gem_object_create_shmem(i915, obj->base.size); + /* + * It seems that we might have some framebuffers still pinned at this + * stage, but for such objects we might also need to deal with the CCS + * aux state. Make sure we force the save/restore of the CCS state, + * otherwise we might observe display corruption, when returning from + * suspend. + */ + flags = 0; + if (i915_gem_object_needs_ccs_pages(obj)) { + WARN_ON_ONCE(!i915_gem_object_is_framebuffer(obj)); + WARN_ON_ONCE(!pm_apply->allow_gpu); + + flags = I915_BO_ALLOC_CCS_AUX; + } + backup = i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM], + obj->base.size, 0, flags); if (IS_ERR(backup)) return PTR_ERR(backup); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 49a8f10d76c7..2daffa7c7dfd 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3690,7 +3690,7 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve) * NB This does not force us to execute on this engine, it will just * typically be the first we inspect for submission. */ - swp = prandom_u32_max(ve->num_siblings); + swp = get_random_u32_below(ve->num_siblings); if (swp) swap(ve->siblings[swp], ve->siblings[0]); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 0325f071046c..9c18b5f2e789 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -677,8 +677,13 @@ int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) return -EINTR; } - return timeout ? timeout : intel_uc_wait_for_idle(>->uc, - remaining_timeout); + if (timeout) + return timeout; + + if (remaining_timeout < 0) + remaining_timeout = 0; + + return intel_uc_wait_for_idle(>->uc, remaining_timeout); } int intel_gt_init(struct intel_gt *gt) @@ -1035,9 +1040,9 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb) { if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) - return intel_gt_mcr_wait_for_reg_fw(gt, rb.mcr_reg, rb.bit, 0, - TLB_INVAL_TIMEOUT_US, - TLB_INVAL_TIMEOUT_MS); + return intel_gt_mcr_wait_for_reg(gt, rb.mcr_reg, rb.bit, 0, + TLB_INVAL_TIMEOUT_US, + TLB_INVAL_TIMEOUT_MS); else return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0, TLB_INVAL_TIMEOUT_US, @@ -1104,14 +1109,25 @@ static void mmio_invalidate_full(struct intel_gt *gt) continue; if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + u32 val = BIT(engine->instance); + + if (engine->class == VIDEO_DECODE_CLASS || + engine->class == VIDEO_ENHANCEMENT_CLASS || + engine->class == COMPUTE_CLASS) + val = _MASKED_BIT_ENABLE(val); intel_gt_mcr_multicast_write_fw(gt, xehp_regs[engine->class], - BIT(engine->instance)); + val); } else { rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); if (!i915_mmio_reg_offset(rb.reg)) continue; + if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS || + engine->class == VIDEO_ENHANCEMENT_CLASS || + engine->class == COMPUTE_CLASS)) + rb.bit = _MASKED_BIT_ENABLE(rb.bit); + intel_uncore_write_fw(uncore, rb.reg, rb.bit); } awake |= engine->mask; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 830edffe88cc..ea86c1ab5dc5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -702,7 +702,7 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, } /** - * intel_gt_mcr_wait_for_reg_fw - wait until MCR register matches expected state + * intel_gt_mcr_wait_for_reg - wait until MCR register matches expected state * @gt: GT structure * @reg: the register to read * @mask: mask to apply to register value @@ -730,17 +730,19 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, * * Return: 0 if the register matches the desired condition, or -ETIMEDOUT. */ -int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt, - i915_mcr_reg_t reg, - u32 mask, - u32 value, - unsigned int fast_timeout_us, - unsigned int slow_timeout_ms) +int intel_gt_mcr_wait_for_reg(struct intel_gt *gt, + i915_mcr_reg_t reg, + u32 mask, + u32 value, + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms) { - u32 reg_value = 0; -#define done (((reg_value = intel_gt_mcr_read_any_fw(gt, reg)) & mask) == value) int ret; + lockdep_assert_not_held(>->uncore->lock); + +#define done ((intel_gt_mcr_read_any(gt, reg) & mask) == value) + /* Catch any overuse of this function */ might_sleep_if(slow_timeout_ms); GEM_BUG_ON(fast_timeout_us > 20000); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h index 3fb0502bff22..ae93b20e1c17 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h @@ -37,12 +37,12 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, unsigned int *group, unsigned int *instance); -int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt, - i915_mcr_reg_t reg, - u32 mask, - u32 value, - unsigned int fast_timeout_us, - unsigned int slow_timeout_ms); +int intel_gt_mcr_wait_for_reg(struct intel_gt *gt, + i915_mcr_reg_t reg, + u32 mask, + u32 value, + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms); /* * Helper for for_each_ss_steering loop. On pre-Xe_HP platforms, subslice diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index edb881d75630..1dfd01668c79 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -199,7 +199,7 @@ out_active: spin_lock(&timelines->lock); if (remaining_timeout) *remaining_timeout = timeout; - return active_count ? timeout : 0; + return active_count ? timeout ?: -ETIME : 0; } static void retire_work_handler(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index b405a04135ca..5fb74e71f27b 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -342,6 +342,16 @@ static int emit_no_arbitration(struct i915_request *rq) return 0; } +static int max_pte_pkt_size(struct i915_request *rq, int pkt) +{ + struct intel_ring *ring = rq->ring; + + pkt = min_t(int, pkt, (ring->space - rq->reserved_space) / sizeof(u32) + 5); + pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + + return pkt; +} + static int emit_pte(struct i915_request *rq, struct sgt_dma *it, enum i915_cache_level cache_level, @@ -388,8 +398,7 @@ static int emit_pte(struct i915_request *rq, return PTR_ERR(cs); /* Pack as many PTE updates as possible into a single MI command */ - pkt = min_t(int, dword_length, ring->space / sizeof(u32) + 5); - pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + pkt = max_pte_pkt_size(rq, dword_length); hdr = cs; *cs++ = MI_STORE_DATA_IMM | REG_BIT(21); /* as qword elements */ @@ -422,8 +431,7 @@ static int emit_pte(struct i915_request *rq, } } - pkt = min_t(int, dword_rem, ring->space / sizeof(u32) + 5); - pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + pkt = max_pte_pkt_size(rq, dword_rem); hdr = cs; *cs++ = MI_STORE_DATA_IMM | REG_BIT(21); @@ -829,14 +837,35 @@ intel_context_migrate_copy(struct intel_context *ce, if (err) goto out_rq; - /* - * While we can't always restore/manage the CCS state, - * we still need to ensure we don't leak the CCS state - * from the previous user, so make sure we overwrite it - * with something. - */ - err = emit_copy_ccs(rq, dst_offset, INDIRECT_ACCESS, - dst_offset, DIRECT_ACCESS, len); + if (src_is_lmem) { + /* + * If the src is already in lmem, then we must + * be doing an lmem -> lmem transfer, and so + * should be safe to directly copy the CCS + * state. In this case we have either + * initialised the CCS aux state when first + * clearing the pages (since it is already + * allocated in lmem), or the user has + * potentially populated it, in which case we + * need to copy the CCS state as-is. + */ + err = emit_copy_ccs(rq, + dst_offset, INDIRECT_ACCESS, + src_offset, INDIRECT_ACCESS, + len); + } else { + /* + * While we can't always restore/manage the CCS + * state, we still need to ensure we don't leak + * the CCS state from the previous user, so make + * sure we overwrite it with something. + */ + err = emit_copy_ccs(rq, + dst_offset, INDIRECT_ACCESS, + dst_offset, DIRECT_ACCESS, + len); + } + if (err) goto out_rq; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 1bd8d63ad4f3..2afb4f80a954 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -3011,7 +3011,7 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li static void engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - if (I915_SELFTEST_ONLY(GRAPHICS_VER(engine->i915) < 4)) + if (GRAPHICS_VER(engine->i915) < 4) return; engine_fake_wa_init(engine, wal); @@ -3036,9 +3036,6 @@ void intel_engine_init_workarounds(struct intel_engine_cs *engine) { struct i915_wa_list *wal = &engine->wa_list; - if (GRAPHICS_VER(engine->i915) < 4) - return; - wa_init_start(wal, engine->gt, "engine", engine->name); engine_init_workarounds(engine, wal); wa_init_finish(wal); diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 0dcb3ed44a73..87c94314cf67 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -317,7 +317,7 @@ static int live_engine_busy_stats(void *arg) ENGINE_TRACE(engine, "measuring busy time\n"); preempt_disable(); de = intel_engine_get_busy_time(engine, &t[0]); - mdelay(10); + mdelay(100); de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de); preempt_enable(); dt = ktime_sub(t[1], t[0]); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 1d49a7ec0bd8..1c1b85073b4b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -170,7 +170,7 @@ static const struct __guc_mmio_reg_descr empty_regs_list[] = { } /* List of lists */ -static struct __guc_mmio_reg_descr_group default_lists[] = { +static const struct __guc_mmio_reg_descr_group default_lists[] = { MAKE_REGLIST(default_global_regs, PF, GLOBAL, 0), MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS), MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS), diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index be855811d85d..410905da8e97 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -211,6 +211,30 @@ void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *b huc->delayed_load.nb.notifier_call = NULL; } +static void delayed_huc_load_init(struct intel_huc *huc) +{ + /* + * Initialize fence to be complete as this is expected to be complete + * unless there is a delayed HuC load in progress. + */ + i915_sw_fence_init(&huc->delayed_load.fence, + sw_fence_dummy_notify); + i915_sw_fence_commit(&huc->delayed_load.fence); + + hrtimer_init(&huc->delayed_load.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + huc->delayed_load.timer.function = huc_delayed_load_timer_callback; +} + +static void delayed_huc_load_fini(struct intel_huc *huc) +{ + /* + * the fence is initialized in init_early, so we need to clean it up + * even if HuC loading is off. + */ + delayed_huc_load_complete(huc); + i915_sw_fence_fini(&huc->delayed_load.fence); +} + static bool vcs_supported(struct intel_gt *gt) { intel_engine_mask_t mask = gt->info.engine_mask; @@ -241,6 +265,15 @@ void intel_huc_init_early(struct intel_huc *huc) intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC); + /* + * we always init the fence as already completed, even if HuC is not + * supported. This way we don't have to distinguish between HuC not + * supported/disabled or already loaded, and can focus on if the load + * is currently in progress (fence not complete) or not, which is what + * we care about for stalling userspace submissions. + */ + delayed_huc_load_init(huc); + if (!vcs_supported(gt)) { intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_NOT_SUPPORTED); return; @@ -255,17 +288,6 @@ void intel_huc_init_early(struct intel_huc *huc) huc->status.mask = HUC_FW_VERIFIED; huc->status.value = HUC_FW_VERIFIED; } - - /* - * Initialize fence to be complete as this is expected to be complete - * unless there is a delayed HuC reload in progress. - */ - i915_sw_fence_init(&huc->delayed_load.fence, - sw_fence_dummy_notify); - i915_sw_fence_commit(&huc->delayed_load.fence); - - hrtimer_init(&huc->delayed_load.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - huc->delayed_load.timer.function = huc_delayed_load_timer_callback; } #define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy") @@ -329,13 +351,14 @@ out: void intel_huc_fini(struct intel_huc *huc) { - if (!intel_uc_fw_is_loadable(&huc->fw)) - return; - - delayed_huc_load_complete(huc); + /* + * the fence is initialized in init_early, so we need to clean it up + * even if HuC loading is off. + */ + delayed_huc_load_fini(huc); - i915_sw_fence_fini(&huc->delayed_load.fence); - intel_uc_fw_fini(&huc->fw); + if (intel_uc_fw_is_loadable(&huc->fw)) + intel_uc_fw_fini(&huc->fw); } void intel_huc_suspend(struct intel_huc *huc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 1d28286e6f06..2a508b137e90 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -722,6 +722,7 @@ int intel_uc_runtime_resume(struct intel_uc *uc) static const struct intel_uc_ops uc_ops_off = { .init_hw = __uc_check_hw, + .fini = __uc_fini, /* to clean-up the init_early initialization */ }; static const struct intel_uc_ops uc_ops_on = { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 0c80ba51a4bd..2bcdd192f814 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -545,6 +545,32 @@ static int check_ccs_header(struct intel_gt *gt, return 0; } +static int try_firmware_load(struct intel_uc_fw *uc_fw, const struct firmware **fw) +{ + struct intel_gt *gt = __uc_fw_to_gt(uc_fw); + struct device *dev = gt->i915->drm.dev; + int err; + + err = firmware_request_nowarn(fw, uc_fw->file_selected.path, dev); + + if (err) + return err; + + if ((*fw)->size > INTEL_UC_RSVD_GGTT_PER_FW) { + drm_err(>->i915->drm, + "%s firmware %s: size (%zuKB) exceeds max supported size (%uKB)\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, + (*fw)->size / SZ_1K, INTEL_UC_RSVD_GGTT_PER_FW / SZ_1K); + + /* try to find another blob to load */ + release_firmware(*fw); + *fw = NULL; + return -ENOENT; + } + + return 0; +} + /** * intel_uc_fw_fetch - fetch uC firmware * @uc_fw: uC firmware @@ -558,7 +584,6 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) struct intel_gt *gt = __uc_fw_to_gt(uc_fw); struct drm_i915_private *i915 = gt->i915; struct intel_uc_fw_file file_ideal; - struct device *dev = i915->drm.dev; struct drm_i915_gem_object *obj; const struct firmware *fw = NULL; bool old_ver = false; @@ -574,20 +599,9 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) __force_fw_fetch_failures(uc_fw, -EINVAL); __force_fw_fetch_failures(uc_fw, -ESTALE); - err = firmware_request_nowarn(&fw, uc_fw->file_selected.path, dev); + err = try_firmware_load(uc_fw, &fw); memcpy(&file_ideal, &uc_fw->file_wanted, sizeof(file_ideal)); - if (!err && fw->size > INTEL_UC_RSVD_GGTT_PER_FW) { - drm_err(&i915->drm, - "%s firmware %s: size (%zuKB) exceeds max supported size (%uKB)\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, - fw->size / SZ_1K, INTEL_UC_RSVD_GGTT_PER_FW / SZ_1K); - - /* try to find another blob to load */ - release_firmware(fw); - err = -ENOENT; - } - /* Any error is terminal if overriding. Don't bother searching for older versions */ if (err && intel_uc_fw_is_overridden(uc_fw)) goto fail; @@ -608,7 +622,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) break; } - err = firmware_request_nowarn(&fw, uc_fw->file_selected.path, dev); + err = try_firmware_load(uc_fw, &fw); } if (err) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 897b6fdbbaed..f5451adcd489 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -664,18 +664,14 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) return -ESRCH; } - kvm_get_kvm(vgpu->vfio_device.kvm); - if (__kvmgt_vgpu_exist(vgpu)) return -EEXIST; vgpu->attached = true; - kvmgt_protect_table_init(vgpu); - gvt_cache_init(vgpu); - vgpu->track_node.track_write = kvmgt_page_track_write; vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot; + kvm_get_kvm(vgpu->vfio_device.kvm); kvm_page_track_register_notifier(vgpu->vfio_device.kvm, &vgpu->track_node); @@ -711,15 +707,19 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev) kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm, &vgpu->track_node); + kvm_put_kvm(vgpu->vfio_device.kvm); + kvmgt_protect_table_destroy(vgpu); gvt_cache_destroy(vgpu); + WARN_ON(vgpu->nr_cache_entries); + + vgpu->gfn_cache = RB_ROOT; + vgpu->dma_addr_cache = RB_ROOT; + intel_vgpu_release_msi_eventfd_ctx(vgpu); vgpu->attached = false; - - if (vgpu->vfio_device.kvm) - kvm_put_kvm(vgpu->vfio_device.kvm); } static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) @@ -1447,9 +1447,17 @@ static int intel_vgpu_init_dev(struct vfio_device *vfio_dev) struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); struct intel_vgpu_type *type = container_of(mdev->type, struct intel_vgpu_type, type); + int ret; vgpu->gvt = kdev_to_i915(mdev->type->parent->dev)->gvt; - return intel_gvt_create_vgpu(vgpu, type->conf); + ret = intel_gvt_create_vgpu(vgpu, type->conf); + if (ret) + return ret; + + kvmgt_protect_table_init(vgpu); + gvt_cache_init(vgpu); + + return 0; } static void intel_vgpu_release_dev(struct vfio_device *vfio_dev) @@ -1457,7 +1465,6 @@ static void intel_vgpu_release_dev(struct vfio_device *vfio_dev) struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); intel_gvt_destroy_vgpu(vgpu); - vfio_free_device(vfio_dev); } static const struct vfio_device_ops intel_vgpu_dev_ops = { @@ -1470,6 +1477,9 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = { .mmap = intel_vgpu_mmap, .ioctl = intel_vgpu_ioctl, .dma_unmap = intel_vgpu_dma_unmap, + .bind_iommufd = vfio_iommufd_emulated_bind, + .unbind_iommufd = vfio_iommufd_emulated_unbind, + .attach_ioas = vfio_iommufd_emulated_attach_ioas, }; static int intel_vgpu_probe(struct mdev_device *mdev) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index f025ee4fa526..a4b4d9b7d26c 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -416,6 +416,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * @vm: Address space to cleanse * @ww: An optional struct i915_gem_ww_ctx. If not NULL, i915_gem_evict_vm * will be able to evict vma's locked by the ww as well. + * @busy_bo: Optional pointer to struct drm_i915_gem_object. If not NULL, then + * in the event i915_gem_evict_vm() is unable to trylock an object for eviction, + * then @busy_bo will point to it. -EBUSY is also returned. The caller must drop + * the vm->mutex, before trying again to acquire the contended lock. The caller + * also owns a reference to the object. * * This function evicts all vmas from a vm. * @@ -425,7 +430,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * To clarify: This is for freeing up virtual address space, not for freeing * memory in e.g. the shrinker. */ -int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) +int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object **busy_bo) { int ret = 0; @@ -457,15 +463,22 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) * the resv is shared among multiple objects, we still * need the object ref. */ - if (dying_vma(vma) || + if (!i915_gem_object_get_rcu(vma->obj) || (ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx))) { __i915_vma_pin(vma); list_add(&vma->evict_link, &locked_eviction_list); continue; } - if (!i915_gem_object_trylock(vma->obj, ww)) + if (!i915_gem_object_trylock(vma->obj, ww)) { + if (busy_bo) { + *busy_bo = vma->obj; /* holds ref */ + ret = -EBUSY; + break; + } + i915_gem_object_put(vma->obj); continue; + } __i915_vma_pin(vma); list_add(&vma->evict_link, &eviction_list); @@ -473,25 +486,29 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) if (list_empty(&eviction_list) && list_empty(&locked_eviction_list)) break; - ret = 0; /* Unbind locked objects first, before unlocking the eviction_list */ list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) { __i915_vma_unpin(vma); - if (ret == 0) + if (ret == 0) { ret = __i915_vma_unbind(vma); - if (ret != -EINTR) /* "Get me out of here!" */ - ret = 0; + if (ret != -EINTR) /* "Get me out of here!" */ + ret = 0; + } + if (!dying_vma(vma)) + i915_gem_object_put(vma->obj); } list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) { __i915_vma_unpin(vma); - if (ret == 0) + if (ret == 0) { ret = __i915_vma_unbind(vma); - if (ret != -EINTR) /* "Get me out of here!" */ - ret = 0; + if (ret != -EINTR) /* "Get me out of here!" */ + ret = 0; + } i915_gem_object_unlock(vma->obj); + i915_gem_object_put(vma->obj); } } while (ret == 0); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.h b/drivers/gpu/drm/i915/i915_gem_evict.h index e593c530f9bd..bf0ee0e4fe60 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.h +++ b/drivers/gpu/drm/i915/i915_gem_evict.h @@ -11,6 +11,7 @@ struct drm_mm_node; struct i915_address_space; struct i915_gem_ww_ctx; +struct drm_i915_gem_object; int __must_check i915_gem_evict_something(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww, @@ -23,6 +24,7 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, struct drm_mm_node *node, unsigned int flags); int i915_gem_evict_vm(struct i915_address_space *vm, - struct i915_gem_ww_ctx *ww); + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object **busy_bo); #endif /* __I915_GEM_EVICT_H__ */ diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index edfe363af838..91c533986041 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1974,7 +1974,10 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) if (ddi_hotplug_trigger) { u32 dig_hotplug_reg; + /* Locking due to DSI native GPIO sequences */ + spin_lock(&dev_priv->irq_lock); dig_hotplug_reg = intel_uncore_rmw(&dev_priv->uncore, SHOTPLUG_CTL_DDI, 0, 0); + spin_unlock(&dev_priv->irq_lock); intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, ddi_hotplug_trigger, dig_hotplug_reg, diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 6da9784fe4a2..ccd1f864aa19 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1129,7 +1129,6 @@ static const struct intel_gt_definition xelpmp_extra_gt[] = { {} }; -__maybe_unused static const struct intel_device_info mtl_info = { XE_HP_FEATURES, XE_LPDP_FEATURES, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 00e09bb18b13..125b6ca25a75 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1383,6 +1383,9 @@ static u32 oa_context_image_offset(struct intel_context *ce, u32 reg) u32 offset, len = (ce->engine->context_size - PAGE_SIZE) / 4; u32 *state = ce->lrc_reg_state; + if (drm_WARN_ON(&ce->engine->i915->drm, !state)) + return U32_MAX; + for (offset = 0; offset < len; ) { if (IS_MI_LRI_CMD(state[offset])) { /* @@ -1447,7 +1450,8 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) if (IS_ERR(ce)) return PTR_ERR(ce); - if (engine_supports_mi_query(stream->engine)) { + if (engine_supports_mi_query(stream->engine) && + HAS_LOGICAL_RING_CONTEXTS(stream->perf->i915)) { /* * We are enabling perf query here. If we don't find the context * offset here, just return an error. diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8e1892d14774..916176872544 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5988,6 +5988,7 @@ #define SHOTPLUG_CTL_DDI _MMIO(0xc4030) #define SHOTPLUG_CTL_DDI_HPD_ENABLE(hpd_pin) (0x8 << (_HPD_PIN_DDI(hpd_pin) * 4)) +#define SHOTPLUG_CTL_DDI_HPD_OUTPUT_DATA(hpd_pin) (0x4 << (_HPD_PIN_DDI(hpd_pin) * 4)) #define SHOTPLUG_CTL_DDI_HPD_STATUS_MASK(hpd_pin) (0x3 << (_HPD_PIN_DDI(hpd_pin) * 4)) #define SHOTPLUG_CTL_DDI_HPD_NO_DETECT(hpd_pin) (0x0 << (_HPD_PIN_DDI(hpd_pin) * 4)) #define SHOTPLUG_CTL_DDI_HPD_SHORT_DETECT(hpd_pin) (0x1 << (_HPD_PIN_DDI(hpd_pin) * 4)) diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index cc2a8821d22a..8a9aad523eec 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -465,7 +465,7 @@ static void irq_i915_sw_fence_work(struct irq_work *wrk) struct i915_sw_dma_fence_cb_timer *cb = container_of(wrk, typeof(*cb), work); - del_timer_sync(&cb->timer); + timer_shutdown_sync(&cb->timer); dma_fence_put(cb->dma); kfree_rcu(cb, rcu); diff --git a/drivers/gpu/drm/i915/i915_user_extensions.c b/drivers/gpu/drm/i915/i915_user_extensions.c index c822d0aafd2d..e3f808372c47 100644 --- a/drivers/gpu/drm/i915/i915_user_extensions.c +++ b/drivers/gpu/drm/i915/i915_user_extensions.c @@ -51,7 +51,7 @@ int i915_user_extensions(struct i915_user_extension __user *ext, return err; if (get_user(next, &ext->next_extension) || - overflows_type(next, ext)) + overflows_type(next, uintptr_t)) return -EFAULT; ext = u64_to_user_ptr(next); diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 6c14d13364bf..67a66d4d5c70 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -111,10 +111,6 @@ bool i915_error_injected(void); #define range_overflows_end_t(type, start, size, max) \ range_overflows_end((type)(start), (type)(size), (type)(max)) -/* Note we don't consider signbits :| */ -#define overflows_type(x, T) \ - (sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T)) - #define ptr_mask_bits(ptr, n) ({ \ unsigned long __v = (unsigned long)(ptr); \ (typeof(ptr))(__v & -BIT(n)); \ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 703fee6b5f75..3a33be5401ed 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1566,7 +1566,7 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, * locked objects when called from execbuf when pinning * is removed. This would probably regress badly. */ - i915_gem_evict_vm(vm, NULL); + i915_gem_evict_vm(vm, NULL, NULL); mutex_unlock(&vm->mutex); } } while (1); diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index 9a4a7fb55582..b9a164efd6ae 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -38,7 +38,7 @@ static int __iopagetest(struct intel_memory_region *mem, u8 value, resource_size_t offset, const void *caller) { - int byte = prandom_u32_max(pagesize); + int byte = get_random_u32_below(pagesize); u8 result[3]; memset_io(va, value, pagesize); /* or GPF! */ @@ -92,7 +92,7 @@ static int iopagetest(struct intel_memory_region *mem, static resource_size_t random_page(resource_size_t last) { /* Limited to low 44b (16TiB), but should suffice for a spot check */ - return prandom_u32_max(last >> PAGE_SHIFT) << PAGE_SHIFT; + return get_random_u32_below(last >> PAGE_SHIFT) << PAGE_SHIFT; } static int iomemtest(struct intel_memory_region *mem, diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 8006a6c61466..614013745fca 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -824,9 +824,9 @@ void intel_uncore_forcewake_flush(struct intel_uncore *uncore, } /** - * intel_uncore_forcewake_put__locked - grab forcewake domain references + * intel_uncore_forcewake_put__locked - release forcewake domain references * @uncore: the intel_uncore structure - * @fw_domains: forcewake domains to get reference on + * @fw_domains: forcewake domains to put references * * See intel_uncore_forcewake_put(). This variant places the onus * on the caller to explicitly handle the dev_priv->uncore.lock spinlock. diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 5449146a0624..e9e38490815d 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -382,20 +382,6 @@ __uncore_write(write_notrace, 32, l, false) */ __uncore_read(read64, 64, q, true) -static inline u64 -intel_uncore_read64_2x32(struct intel_uncore *uncore, - i915_reg_t lower_reg, i915_reg_t upper_reg) -{ - u32 upper, lower, old_upper, loop = 0; - upper = intel_uncore_read(uncore, upper_reg); - do { - old_upper = upper; - lower = intel_uncore_read(uncore, lower_reg); - upper = intel_uncore_read(uncore, upper_reg); - } while (upper != old_upper && loop++ < 2); - return (u64)upper << 32 | lower; -} - #define intel_uncore_posting_read(...) ((void)intel_uncore_read_notrace(__VA_ARGS__)) #define intel_uncore_posting_read16(...) ((void)intel_uncore_read16_notrace(__VA_ARGS__)) @@ -455,6 +441,36 @@ static inline void intel_uncore_rmw_fw(struct intel_uncore *uncore, intel_uncore_write_fw(uncore, reg, val); } +static inline u64 +intel_uncore_read64_2x32(struct intel_uncore *uncore, + i915_reg_t lower_reg, i915_reg_t upper_reg) +{ + u32 upper, lower, old_upper, loop = 0; + enum forcewake_domains fw_domains; + unsigned long flags; + + fw_domains = intel_uncore_forcewake_for_reg(uncore, lower_reg, + FW_REG_READ); + + fw_domains |= intel_uncore_forcewake_for_reg(uncore, upper_reg, + FW_REG_READ); + + spin_lock_irqsave(&uncore->lock, flags); + intel_uncore_forcewake_get__locked(uncore, fw_domains); + + upper = intel_uncore_read_fw(uncore, upper_reg); + do { + old_upper = upper; + lower = intel_uncore_read_fw(uncore, lower_reg); + upper = intel_uncore_read_fw(uncore, upper_reg); + } while (upper != old_upper && loop++ < 2); + + intel_uncore_forcewake_put__locked(uncore, fw_domains); + spin_unlock_irqrestore(&uncore->lock, flags); + + return (u64)upper << 32 | lower; +} + static inline int intel_uncore_write_and_verify(struct intel_uncore *uncore, i915_reg_t reg, u32 val, u32 mask, u32 expected_val) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 8c6517d29b8e..37068542aafe 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -344,7 +344,7 @@ static int igt_evict_vm(void *arg) /* Everything is pinned, nothing should happen */ mutex_lock(&ggtt->vm.mutex); - err = i915_gem_evict_vm(&ggtt->vm, NULL); + err = i915_gem_evict_vm(&ggtt->vm, NULL, NULL); mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", @@ -356,7 +356,7 @@ static int igt_evict_vm(void *arg) for_i915_gem_ww(&ww, err, false) { mutex_lock(&ggtt->vm.mutex); - err = i915_gem_evict_vm(&ggtt->vm, &ww); + err = i915_gem_evict_vm(&ggtt->vm, &ww, NULL); mutex_unlock(&ggtt->vm.mutex); } diff --git a/drivers/gpu/drm/imx/Kconfig b/drivers/gpu/drm/imx/Kconfig index 975de4ff7313..fd5b2471fdf0 100644 --- a/drivers/gpu/drm/imx/Kconfig +++ b/drivers/gpu/drm/imx/Kconfig @@ -4,7 +4,6 @@ config DRM_IMX select DRM_KMS_HELPER select VIDEOMODE_HELPERS select DRM_GEM_DMA_HELPER - select DRM_KMS_HELPER depends on DRM && (ARCH_MXC || ARCH_MULTIPLATFORM || COMPILE_TEST) depends on IMX_IPUV3_CORE help diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c index d64ebd2cf15e..d6832f506322 100644 --- a/drivers/gpu/drm/imx/imx-tve.c +++ b/drivers/gpu/drm/imx/imx-tve.c @@ -217,8 +217,9 @@ static int imx_tve_connector_get_modes(struct drm_connector *connector) return ret; } -static int imx_tve_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) +static enum drm_mode_status +imx_tve_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) { struct imx_tve *tve = con_to_tve(connector); unsigned long rate; diff --git a/drivers/gpu/drm/lima/lima_devfreq.c b/drivers/gpu/drm/lima/lima_devfreq.c index 011be7ff51e1..bc8fb4e38d0a 100644 --- a/drivers/gpu/drm/lima/lima_devfreq.c +++ b/drivers/gpu/drm/lima/lima_devfreq.c @@ -112,11 +112,6 @@ int lima_devfreq_init(struct lima_device *ldev) unsigned long cur_freq; int ret; const char *regulator_names[] = { "mali", NULL }; - const char *clk_names[] = { "core", NULL }; - struct dev_pm_opp_config config = { - .regulator_names = regulator_names, - .clk_names = clk_names, - }; if (!device_property_present(dev, "operating-points-v2")) /* Optional, continue without devfreq */ @@ -124,7 +119,15 @@ int lima_devfreq_init(struct lima_device *ldev) spin_lock_init(&ldevfreq->lock); - ret = devm_pm_opp_set_config(dev, &config); + /* + * clkname is set separately so it is not affected by the optional + * regulator setting which may return error. + */ + ret = devm_pm_opp_set_clkname(dev, "core"); + if (ret) + return ret; + + ret = devm_pm_opp_set_regulators(dev, regulator_names); if (ret) { /* Continue if the optional regulator is missing */ if (ret != -ENODEV) diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index 002b0f6cae1a..84daeaffab6a 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -29,17 +29,22 @@ #define DISP_REG_OVL_DATAPATH_CON 0x0024 #define OVL_LAYER_SMI_ID_EN BIT(0) #define OVL_BGCLR_SEL_IN BIT(2) +#define OVL_LAYER_AFBC_EN(n) BIT(4+n) #define DISP_REG_OVL_ROI_BGCLR 0x0028 #define DISP_REG_OVL_SRC_CON 0x002c #define DISP_REG_OVL_CON(n) (0x0030 + 0x20 * (n)) #define DISP_REG_OVL_SRC_SIZE(n) (0x0038 + 0x20 * (n)) #define DISP_REG_OVL_OFFSET(n) (0x003c + 0x20 * (n)) +#define DISP_REG_OVL_PITCH_MSB(n) (0x0040 + 0x20 * (n)) +#define OVL_PITCH_MSB_2ND_SUBBUF BIT(16) #define DISP_REG_OVL_PITCH(n) (0x0044 + 0x20 * (n)) #define DISP_REG_OVL_RDMA_CTRL(n) (0x00c0 + 0x20 * (n)) #define DISP_REG_OVL_RDMA_GMC(n) (0x00c8 + 0x20 * (n)) #define DISP_REG_OVL_ADDR_MT2701 0x0040 #define DISP_REG_OVL_ADDR_MT8173 0x0f40 #define DISP_REG_OVL_ADDR(ovl, n) ((ovl)->data->addr + 0x20 * (n)) +#define DISP_REG_OVL_HDR_ADDR(ovl, n) ((ovl)->data->addr + 0x20 * (n) + 0x04) +#define DISP_REG_OVL_HDR_PITCH(ovl, n) ((ovl)->data->addr + 0x20 * (n) + 0x08) #define GMC_THRESHOLD_BITS 16 #define GMC_THRESHOLD_HIGH ((1 << GMC_THRESHOLD_BITS) / 4) @@ -67,6 +72,7 @@ struct mtk_disp_ovl_data { unsigned int layer_nr; bool fmt_rgb565_is_0; bool smi_id_en; + bool supports_afbc; }; /* @@ -172,7 +178,14 @@ void mtk_ovl_stop(struct device *dev) reg = reg & ~OVL_LAYER_SMI_ID_EN; writel_relaxed(reg, ovl->regs + DISP_REG_OVL_DATAPATH_CON); } +} +static void mtk_ovl_set_afbc(struct mtk_disp_ovl *ovl, struct cmdq_pkt *cmdq_pkt, + int idx, bool enabled) +{ + mtk_ddp_write_mask(cmdq_pkt, enabled ? OVL_LAYER_AFBC_EN(idx) : 0, + &ovl->cmdq_reg, ovl->regs, + DISP_REG_OVL_DATAPATH_CON, OVL_LAYER_AFBC_EN(idx)); } void mtk_ovl_config(struct device *dev, unsigned int w, @@ -310,11 +323,23 @@ void mtk_ovl_layer_config(struct device *dev, unsigned int idx, struct mtk_disp_ovl *ovl = dev_get_drvdata(dev); struct mtk_plane_pending_state *pending = &state->pending; unsigned int addr = pending->addr; - unsigned int pitch = pending->pitch & 0xffff; + unsigned int hdr_addr = pending->hdr_addr; + unsigned int pitch = pending->pitch; + unsigned int hdr_pitch = pending->hdr_pitch; unsigned int fmt = pending->format; unsigned int offset = (pending->y << 16) | pending->x; unsigned int src_size = (pending->height << 16) | pending->width; unsigned int con; + bool is_afbc = pending->modifier != DRM_FORMAT_MOD_LINEAR; + union overlay_pitch { + struct split_pitch { + u16 lsb; + u16 msb; + } split_pitch; + u32 pitch; + } overlay_pitch; + + overlay_pitch.pitch = pitch; if (!pending->enable) { mtk_ovl_layer_off(dev, idx, cmdq_pkt); @@ -335,9 +360,12 @@ void mtk_ovl_layer_config(struct device *dev, unsigned int idx, addr += pending->pitch - 1; } + if (ovl->data->supports_afbc) + mtk_ovl_set_afbc(ovl, cmdq_pkt, idx, is_afbc); + mtk_ddp_write_relaxed(cmdq_pkt, con, &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_CON(idx)); - mtk_ddp_write_relaxed(cmdq_pkt, pitch, &ovl->cmdq_reg, ovl->regs, + mtk_ddp_write_relaxed(cmdq_pkt, overlay_pitch.split_pitch.lsb, &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH(idx)); mtk_ddp_write_relaxed(cmdq_pkt, src_size, &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_SRC_SIZE(idx)); @@ -346,6 +374,20 @@ void mtk_ovl_layer_config(struct device *dev, unsigned int idx, mtk_ddp_write_relaxed(cmdq_pkt, addr, &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_ADDR(ovl, idx)); + if (is_afbc) { + mtk_ddp_write_relaxed(cmdq_pkt, hdr_addr, &ovl->cmdq_reg, ovl->regs, + DISP_REG_OVL_HDR_ADDR(ovl, idx)); + mtk_ddp_write_relaxed(cmdq_pkt, + OVL_PITCH_MSB_2ND_SUBBUF | overlay_pitch.split_pitch.msb, + &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH_MSB(idx)); + mtk_ddp_write_relaxed(cmdq_pkt, hdr_pitch, &ovl->cmdq_reg, ovl->regs, + DISP_REG_OVL_HDR_PITCH(ovl, idx)); + } else { + mtk_ddp_write_relaxed(cmdq_pkt, + overlay_pitch.split_pitch.msb, + &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH_MSB(idx)); + } + mtk_ovl_layer_on(dev, idx, cmdq_pkt); } @@ -492,6 +534,15 @@ static const struct mtk_disp_ovl_data mt8192_ovl_2l_driver_data = { .smi_id_en = true, }; +static const struct mtk_disp_ovl_data mt8195_ovl_driver_data = { + .addr = DISP_REG_OVL_ADDR_MT8173, + .gmc_bits = 10, + .layer_nr = 4, + .fmt_rgb565_is_0 = true, + .smi_id_en = true, + .supports_afbc = true, +}; + static const struct of_device_id mtk_disp_ovl_driver_dt_match[] = { { .compatible = "mediatek,mt2701-disp-ovl", .data = &mt2701_ovl_driver_data}, @@ -505,6 +556,8 @@ static const struct of_device_id mtk_disp_ovl_driver_dt_match[] = { .data = &mt8192_ovl_driver_data}, { .compatible = "mediatek,mt8192-disp-ovl-2l", .data = &mt8192_ovl_2l_driver_data}, + { .compatible = "mediatek,mt8195-disp-ovl", + .data = &mt8195_ovl_driver_data}, {}, }; MODULE_DEVICE_TABLE(of, mtk_disp_ovl_driver_dt_match); diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index 508a6d994e83..4317595a15d1 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -461,9 +461,6 @@ static void mtk_dpi_power_off(struct mtk_dpi *dpi) if (--dpi->refcount != 0) return; - if (dpi->pinctrl && dpi->pins_gpio) - pinctrl_select_state(dpi->pinctrl, dpi->pins_gpio); - mtk_dpi_disable(dpi); clk_disable_unprepare(dpi->pixel_clk); clk_disable_unprepare(dpi->engine_clk); @@ -488,9 +485,6 @@ static int mtk_dpi_power_on(struct mtk_dpi *dpi) goto err_pixel; } - if (dpi->pinctrl && dpi->pins_dpi) - pinctrl_select_state(dpi->pinctrl, dpi->pins_dpi); - return 0; err_pixel: @@ -721,12 +715,18 @@ static void mtk_dpi_bridge_disable(struct drm_bridge *bridge) struct mtk_dpi *dpi = bridge_to_dpi(bridge); mtk_dpi_power_off(dpi); + + if (dpi->pinctrl && dpi->pins_gpio) + pinctrl_select_state(dpi->pinctrl, dpi->pins_gpio); } static void mtk_dpi_bridge_enable(struct drm_bridge *bridge) { struct mtk_dpi *dpi = bridge_to_dpi(bridge); + if (dpi->pinctrl && dpi->pins_dpi) + pinctrl_select_state(dpi->pinctrl, dpi->pins_dpi); + mtk_dpi_power_on(dpi); mtk_dpi_set_display_mode(dpi, &dpi->mode); mtk_dpi_enable(dpi); @@ -929,6 +929,20 @@ static const struct mtk_dpi_conf mt8183_conf = { .csc_enable_bit = CSC_ENABLE, }; +static const struct mtk_dpi_conf mt8188_dpintf_conf = { + .cal_factor = mt8195_dpintf_calculate_factor, + .max_clock_khz = 600000, + .output_fmts = mt8195_output_fmts, + .num_output_fmts = ARRAY_SIZE(mt8195_output_fmts), + .pixels_per_iter = 4, + .input_2pixel = false, + .dimension_mask = DPINTF_HPW_MASK, + .hvsize_mask = DPINTF_HSIZE_MASK, + .channel_swap_shift = DPINTF_CH_SWAP, + .yuv422_en_bit = DPINTF_YUV422_EN, + .csc_enable_bit = DPINTF_CSC_ENABLE, +}; + static const struct mtk_dpi_conf mt8192_conf = { .cal_factor = mt8183_calculate_factor, .reg_h_fre_con = 0xe0, @@ -1079,6 +1093,9 @@ static const struct of_device_id mtk_dpi_of_ids[] = { { .compatible = "mediatek,mt8183-dpi", .data = &mt8183_conf, }, + { .compatible = "mediatek,mt8188-dp-intf", + .data = &mt8188_dpintf_conf, + }, { .compatible = "mediatek,mt8192-dpi", .data = &mt8192_conf, }, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 39a42dc8fb85..cd5b18ef7951 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -387,6 +387,12 @@ static int mtk_drm_kms_init(struct drm_device *drm) goto put_mutex_dev; /* + * Ensure internal panels are at the top of the connector list before + * crtc creation. + */ + drm_helper_move_panel_connectors_to_head(drm); + + /* * We currently support two fixed data streams, each optional, * and each statically assigned to a crtc: * OVL0 -> COLOR0 -> AAL -> OD -> RDMA0 -> UFOE -> DSI0 ... @@ -631,6 +637,8 @@ static const struct of_device_id mtk_ddp_comp_dt_ids[] = { .data = (void *)MTK_DPI }, { .compatible = "mediatek,mt8183-dpi", .data = (void *)MTK_DPI }, + { .compatible = "mediatek,mt8188-dp-intf", + .data = (void *)MTK_DP_INTF }, { .compatible = "mediatek,mt8192-dpi", .data = (void *)MTK_DPI }, { .compatible = "mediatek,mt8195-dp-intf", diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c index 2f5e007dd380..d54fbf34b000 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c @@ -11,6 +11,7 @@ #include <drm/drm_fourcc.h> #include <drm/drm_framebuffer.h> #include <drm/drm_gem_atomic_helper.h> +#include <linux/align.h> #include "mtk_drm_crtc.h" #include "mtk_drm_ddp_comp.h" @@ -32,6 +33,14 @@ static const u32 formats[] = { DRM_FORMAT_YUYV, }; +static const u64 modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 | + AFBC_FORMAT_MOD_SPLIT | + AFBC_FORMAT_MOD_SPARSE), + DRM_FORMAT_MOD_INVALID, +}; + static void mtk_plane_reset(struct drm_plane *plane) { struct mtk_plane_state *state; @@ -51,6 +60,7 @@ static void mtk_plane_reset(struct drm_plane *plane) state->base.plane = plane; state->pending.format = DRM_FORMAT_RGB565; + state->pending.modifier = DRM_FORMAT_MOD_LINEAR; } static struct drm_plane_state *mtk_plane_duplicate_state(struct drm_plane *plane) @@ -71,6 +81,32 @@ static struct drm_plane_state *mtk_plane_duplicate_state(struct drm_plane *plane return &state->base; } +static bool mtk_plane_format_mod_supported(struct drm_plane *plane, + uint32_t format, + uint64_t modifier) +{ + if (modifier == DRM_FORMAT_MOD_LINEAR) + return true; + + if (modifier != DRM_FORMAT_MOD_ARM_AFBC( + AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 | + AFBC_FORMAT_MOD_SPLIT | + AFBC_FORMAT_MOD_SPARSE)) + return false; + + if (format != DRM_FORMAT_XRGB8888 && + format != DRM_FORMAT_ARGB8888 && + format != DRM_FORMAT_BGRX8888 && + format != DRM_FORMAT_BGRA8888 && + format != DRM_FORMAT_ABGR8888 && + format != DRM_FORMAT_XBGR8888 && + format != DRM_FORMAT_RGB888 && + format != DRM_FORMAT_BGR888) + return false; + + return true; +} + static void mtk_drm_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state) { @@ -119,21 +155,52 @@ static void mtk_plane_update_new_state(struct drm_plane_state *new_state, struct drm_gem_object *gem; struct mtk_drm_gem_obj *mtk_gem; unsigned int pitch, format; + u64 modifier; dma_addr_t addr; + dma_addr_t hdr_addr = 0; + unsigned int hdr_pitch = 0; gem = fb->obj[0]; mtk_gem = to_mtk_gem_obj(gem); addr = mtk_gem->dma_addr; pitch = fb->pitches[0]; format = fb->format->format; + modifier = fb->modifier; - addr += (new_state->src.x1 >> 16) * fb->format->cpp[0]; - addr += (new_state->src.y1 >> 16) * pitch; + if (modifier == DRM_FORMAT_MOD_LINEAR) { + addr += (new_state->src.x1 >> 16) * fb->format->cpp[0]; + addr += (new_state->src.y1 >> 16) * pitch; + } else { + int width_in_blocks = ALIGN(fb->width, AFBC_DATA_BLOCK_WIDTH) + / AFBC_DATA_BLOCK_WIDTH; + int height_in_blocks = ALIGN(fb->height, AFBC_DATA_BLOCK_HEIGHT) + / AFBC_DATA_BLOCK_HEIGHT; + int x_offset_in_blocks = (new_state->src.x1 >> 16) / AFBC_DATA_BLOCK_WIDTH; + int y_offset_in_blocks = (new_state->src.y1 >> 16) / AFBC_DATA_BLOCK_HEIGHT; + int hdr_size; + + hdr_pitch = width_in_blocks * AFBC_HEADER_BLOCK_SIZE; + pitch = width_in_blocks * AFBC_DATA_BLOCK_WIDTH * + AFBC_DATA_BLOCK_HEIGHT * fb->format->cpp[0]; + + hdr_size = ALIGN(hdr_pitch * height_in_blocks, AFBC_HEADER_ALIGNMENT); + + hdr_addr = addr + hdr_pitch * y_offset_in_blocks + + AFBC_HEADER_BLOCK_SIZE * x_offset_in_blocks; + /* The data plane is offset by 1 additional block. */ + addr = addr + hdr_size + + pitch * y_offset_in_blocks + + AFBC_DATA_BLOCK_WIDTH * AFBC_DATA_BLOCK_HEIGHT * + fb->format->cpp[0] * (x_offset_in_blocks + 1); + } mtk_plane_state->pending.enable = true; mtk_plane_state->pending.pitch = pitch; + mtk_plane_state->pending.hdr_pitch = hdr_pitch; mtk_plane_state->pending.format = format; + mtk_plane_state->pending.modifier = modifier; mtk_plane_state->pending.addr = addr; + mtk_plane_state->pending.hdr_addr = hdr_addr; mtk_plane_state->pending.x = new_state->dst.x1; mtk_plane_state->pending.y = new_state->dst.y1; mtk_plane_state->pending.width = drm_rect_width(&new_state->dst); @@ -172,6 +239,7 @@ static const struct drm_plane_funcs mtk_plane_funcs = { .reset = mtk_plane_reset, .atomic_duplicate_state = mtk_plane_duplicate_state, .atomic_destroy_state = mtk_drm_plane_destroy_state, + .format_mod_supported = mtk_plane_format_mod_supported, }; static int mtk_plane_atomic_check(struct drm_plane *plane, @@ -253,7 +321,7 @@ int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, err = drm_universal_plane_init(dev, plane, possible_crtcs, &mtk_plane_funcs, formats, - ARRAY_SIZE(formats), NULL, type, NULL); + ARRAY_SIZE(formats), modifiers, type, NULL); if (err) { DRM_ERROR("failed to initialize plane\n"); return err; diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.h b/drivers/gpu/drm/mediatek/mtk_drm_plane.h index 2d5ec66e3df1..8f39011cdbfc 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.h @@ -10,12 +10,20 @@ #include <drm/drm_crtc.h> #include <linux/types.h> +#define AFBC_DATA_BLOCK_WIDTH 32 +#define AFBC_DATA_BLOCK_HEIGHT 8 +#define AFBC_HEADER_BLOCK_SIZE 16 +#define AFBC_HEADER_ALIGNMENT 1024 + struct mtk_plane_pending_state { bool config; bool enable; dma_addr_t addr; + dma_addr_t hdr_addr; unsigned int pitch; + unsigned int hdr_pitch; unsigned int format; + unsigned long long modifier; unsigned int x; unsigned int y; unsigned int width; diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 12fa78f286ff..0a8e0a13f516 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1202,9 +1202,10 @@ static enum drm_connector_status mtk_hdmi_detect(struct mtk_hdmi *hdmi) return mtk_hdmi_update_plugged_status(hdmi); } -static int mtk_hdmi_bridge_mode_valid(struct drm_bridge *bridge, - const struct drm_display_info *info, - const struct drm_display_mode *mode) +static enum drm_mode_status +mtk_hdmi_bridge_mode_valid(struct drm_bridge *bridge, + const struct drm_display_info *info, + const struct drm_display_mode *mode) { struct mtk_hdmi *hdmi = hdmi_ctx_from_bridge(bridge); struct drm_bridge *next_bridge; diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index 4e0cbd682725..3c9dfdb0b328 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -155,7 +155,7 @@ config DRM_MSM_HDMI Compile in support for the HDMI output MSM DRM driver. It can be a primary or a secondary display on device. Note that this is used only for the direct HDMI output. If the device outputs HDMI data - throught some kind of DSI-to-HDMI bridge, this option can be disabled. + through some kind of DSI-to-HDMI bridge, this option can be disabled. config DRM_MSM_HDMI_HDCP bool "Enable HDMI HDCP support in MSM DRM driver" diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 2c8b9899625b..948785ed07bb 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -500,7 +500,7 @@ static const struct adreno_gpu_funcs funcs = { #endif .gpu_state_get = a3xx_gpu_state_get, .gpu_state_put = adreno_gpu_state_put, - .create_address_space = adreno_iommu_create_address_space, + .create_address_space = adreno_create_address_space, .get_rptr = a3xx_get_rptr, }, }; diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index 7cb8d9849c07..3e09d3a7a0ac 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -606,8 +606,7 @@ static int a4xx_pm_suspend(struct msm_gpu *gpu) { static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) { - *value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO, - REG_A4XX_RBBM_PERFCTR_CP_0_HI); + *value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO); return 0; } @@ -635,7 +634,7 @@ static const struct adreno_gpu_funcs funcs = { #endif .gpu_state_get = a4xx_gpu_state_get, .gpu_state_put = adreno_gpu_state_put, - .create_address_space = adreno_iommu_create_address_space, + .create_address_space = adreno_create_address_space, .get_rptr = a4xx_get_rptr, }, .get_timestamp = a4xx_get_timestamp, diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 3dcec7acb384..660ba0db8900 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -605,11 +605,9 @@ static int a5xx_ucode_init(struct msm_gpu *gpu) a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo); } - gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, - REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova); + gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, a5xx_gpu->pm4_iova); - gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, - REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova); + gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, a5xx_gpu->pfp_iova); return 0; } @@ -868,8 +866,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) * memory rendering at this point in time and we don't want to block off * part of the virtual memory space. */ - gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, - REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000); + gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000); gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); /* Put the GPU into 64 bit by default */ @@ -908,8 +905,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) return ret; /* Set the ringbuffer address */ - gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI, - gpu->rb[0]->iova); + gpu_write64(gpu, REG_A5XX_CP_RB_BASE, gpu->rb[0]->iova); /* * If the microcode supports the WHERE_AM_I opcode then we can use that @@ -936,7 +932,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) } gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR, - REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0])); + shadowptr(a5xx_gpu, gpu->rb[0])); } else if (gpu->nr_rings > 1) { /* Disable preemption if WHERE_AM_I isn't available */ a5xx_preempt_fini(gpu); @@ -1239,9 +1235,9 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu) gpu_read(gpu, REG_A5XX_RBBM_STATUS), gpu_read(gpu, REG_A5XX_CP_RB_RPTR), gpu_read(gpu, REG_A5XX_CP_RB_WPTR), - gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI), + gpu_read64(gpu, REG_A5XX_CP_IB1_BASE), gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ), - gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI), + gpu_read64(gpu, REG_A5XX_CP_IB2_BASE), gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ)); /* Turn off the hangcheck timer to keep it from bothering us */ @@ -1427,8 +1423,7 @@ static int a5xx_pm_suspend(struct msm_gpu *gpu) static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) { - *value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO, - REG_A5XX_RBBM_ALWAYSON_COUNTER_HI); + *value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO); return 0; } @@ -1465,8 +1460,7 @@ static int a5xx_crashdumper_run(struct msm_gpu *gpu, if (IS_ERR_OR_NULL(dumper->ptr)) return -EINVAL; - gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, - REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova); + gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova); gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1); @@ -1666,8 +1660,7 @@ static u64 a5xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate) { u64 busy_cycles; - busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO, - REG_A5XX_RBBM_PERFCTR_RBBM_0_HI); + busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO); *out_sample_rate = clk_get_rate(gpu->core_clk); return busy_cycles; @@ -1705,7 +1698,7 @@ static const struct adreno_gpu_funcs funcs = { .gpu_busy = a5xx_gpu_busy, .gpu_state_get = a5xx_gpu_state_get, .gpu_state_put = a5xx_gpu_state_put, - .create_address_space = adreno_iommu_create_address_space, + .create_address_space = adreno_create_address_space, .get_rptr = a5xx_get_rptr, }, .get_timestamp = a5xx_get_timestamp, diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c index 8abc9a2b114a..7658e89844b4 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c @@ -137,7 +137,6 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu) /* Set the address of the incoming preemption record */ gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO, - REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI, a5xx_gpu->preempt_iova[ring->id]); a5xx_gpu->next_ring = ring; @@ -211,8 +210,7 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu) } /* Write a 0 to signal that we aren't switching pagetables */ - gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO, - REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, 0); + gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO, 0); /* Reset the preemption state */ set_preempt_state(a5xx_gpu, PREEMPT_NONE); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index e033d6a67a20..6484b97c5344 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -1213,19 +1213,17 @@ static int a6xx_gmu_memory_alloc(struct a6xx_gmu *gmu, struct a6xx_gmu_bo *bo, static int a6xx_gmu_memory_probe(struct a6xx_gmu *gmu) { - struct iommu_domain *domain; struct msm_mmu *mmu; - domain = iommu_domain_alloc(&platform_bus_type); - if (!domain) + mmu = msm_iommu_new(gmu->dev, 0); + if (!mmu) return -ENODEV; + if (IS_ERR(mmu)) + return PTR_ERR(mmu); - mmu = msm_iommu_new(gmu->dev, domain); gmu->aspace = msm_gem_address_space_create(mmu, "gmu", 0x0, 0x80000000); - if (IS_ERR(gmu->aspace)) { - iommu_domain_free(domain); + if (IS_ERR(gmu->aspace)) return PTR_ERR(gmu->aspace); - } return 0; } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index fdc578016e0b..36c8fb699b56 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -247,8 +247,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_RING(ring, submit->seqno); trace_msm_gpu_submit_flush(submit, - gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO, - REG_A6XX_CP_ALWAYS_ON_COUNTER_HI)); + gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO)); a6xx_flush(gpu, ring); } @@ -947,8 +946,7 @@ static int a6xx_ucode_init(struct msm_gpu *gpu) } } - gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, - REG_A6XX_CP_SQE_INSTR_BASE+1, a6xx_gpu->sqe_iova); + gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova); return 0; } @@ -999,8 +997,7 @@ static int hw_init(struct msm_gpu *gpu) * memory rendering at this point in time and we don't want to block off * part of the virtual memory space. */ - gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, - REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000); + gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000); gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); /* Turn on 64 bit addressing for all blocks */ @@ -1049,11 +1046,9 @@ static int hw_init(struct msm_gpu *gpu) if (!adreno_is_a650_family(adreno_gpu)) { /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */ - gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO, - REG_A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x00100000); + gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000); gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX_LO, - REG_A6XX_UCHE_GMEM_RANGE_MAX_HI, 0x00100000 + adreno_gpu->gmem - 1); } @@ -1145,8 +1140,7 @@ static int hw_init(struct msm_gpu *gpu) goto out; /* Set the ringbuffer address */ - gpu_write64(gpu, REG_A6XX_CP_RB_BASE, REG_A6XX_CP_RB_BASE_HI, - gpu->rb[0]->iova); + gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova); /* Targets that support extended APRIV can use the RPTR shadow from * hardware but all the other ones need to disable the feature. Targets @@ -1178,7 +1172,6 @@ static int hw_init(struct msm_gpu *gpu) } gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR_LO, - REG_A6XX_CP_RB_RPTR_ADDR_HI, shadowptr(a6xx_gpu, gpu->rb[0])); } @@ -1499,9 +1492,9 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) gpu_read(gpu, REG_A6XX_RBBM_STATUS), gpu_read(gpu, REG_A6XX_CP_RB_RPTR), gpu_read(gpu, REG_A6XX_CP_RB_WPTR), - gpu_read64(gpu, REG_A6XX_CP_IB1_BASE, REG_A6XX_CP_IB1_BASE_HI), + gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), - gpu_read64(gpu, REG_A6XX_CP_IB2_BASE, REG_A6XX_CP_IB2_BASE_HI), + gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE)); /* Turn off the hangcheck timer to keep it from bothering us */ @@ -1712,8 +1705,7 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) /* Force the GPU power on so we can read this register */ a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); - *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO, - REG_A6XX_CP_ALWAYS_ON_COUNTER_HI); + *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO); a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); @@ -1786,43 +1778,16 @@ a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - struct iommu_domain *iommu; - struct msm_mmu *mmu; - struct msm_gem_address_space *aspace; - u64 start, size; - - iommu = iommu_domain_alloc(&platform_bus_type); - if (!iommu) - return NULL; + unsigned long quirks = 0; /* * This allows GPU to set the bus attributes required to use system * cache on behalf of the iommu page table walker. */ if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice)) - adreno_set_llc_attributes(iommu); - - mmu = msm_iommu_new(&pdev->dev, iommu); - if (IS_ERR(mmu)) { - iommu_domain_free(iommu); - return ERR_CAST(mmu); - } - - /* - * Use the aperture start or SZ_16M, whichever is greater. This will - * ensure that we align with the allocated pagetable range while still - * allowing room in the lower 32 bits for GMEM and whatnot - */ - start = max_t(u64, SZ_16M, iommu->geometry.aperture_start); - size = iommu->geometry.aperture_end - start + 1; - - aspace = msm_gem_address_space_create(mmu, "gpu", - start & GENMASK_ULL(48, 0), size); + quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA; - if (IS_ERR(aspace) && !IS_ERR(mmu)) - mmu->funcs->destroy(mmu); - - return aspace; + return adreno_iommu_create_address_space(gpu, pdev, quirks); } static struct msm_gem_address_space * @@ -1851,6 +1816,39 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR); } +static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + struct msm_cp_state cp_state = { + .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), + .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), + .ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), + .ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE), + }; + bool progress; + + /* + * Adjust the remaining data to account for what has already been + * fetched from memory, but not yet consumed by the SQE. + * + * This is not *technically* correct, the amount buffered could + * exceed the IB size due to hw prefetching ahead, but: + * + * (1) We aren't trying to find the exact position, just whether + * progress has been made + * (2) The CP_REG_TO_MEM at the end of a submit should be enough + * to prevent prefetching into an unrelated submit. (And + * either way, at some point the ROQ will be full.) + */ + cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB1_STAT) >> 16; + cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB2_STAT) >> 16; + + progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state)); + + ring->last_cp_state = cp_state; + + return progress; +} + static u32 a618_get_speed_bin(u32 fuse) { if (fuse == 0) @@ -1906,7 +1904,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse) if (val == UINT_MAX) { DRM_DEV_ERROR(dev, - "missing support for speed-bin: %u. Some OPPs may not be supported by hardware", + "missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n", fuse); return UINT_MAX; } @@ -1916,7 +1914,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse) static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev) { - u32 supp_hw = UINT_MAX; + u32 supp_hw; u32 speedbin; int ret; @@ -1928,15 +1926,13 @@ static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev) if (ret == -ENOENT) { return 0; } else if (ret) { - DRM_DEV_ERROR(dev, - "failed to read speed-bin (%d). Some OPPs may not be supported by hardware", - ret); - goto done; + dev_err_probe(dev, ret, + "failed to read speed-bin. Some OPPs may not be supported by hardware\n"); + return ret; } supp_hw = fuse_to_supp_hw(dev, rev, speedbin); -done: ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1); if (ret) return ret; @@ -1969,6 +1965,7 @@ static const struct adreno_gpu_funcs funcs = { .create_address_space = a6xx_create_address_space, .create_private_address_space = a6xx_create_private_address_space, .get_rptr = a6xx_get_rptr, + .progress = a6xx_progress, }, .get_timestamp = a6xx_get_timestamp, }; @@ -2005,13 +2002,6 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), info->rev))) adreno_gpu->base.hw_apriv = true; - /* - * For now only clamp to idle freq for devices where this is known not - * to cause power supply issues: - */ - if (info && (info->revn == 618)) - gpu->clamp_to_idle = true; - a6xx_llc_slices_init(pdev, a6xx_gpu); ret = a6xx_set_supported_hw(&pdev->dev, config->rev); @@ -2026,6 +2016,13 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) return ERR_PTR(ret); } + /* + * For now only clamp to idle freq for devices where this is known not + * to cause power supply issues: + */ + if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu)) + gpu->clamp_to_idle = true; + /* Check if there is a GMU phandle and set it up */ node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index 55f443328d8e..a023d5f962dc 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -91,7 +91,7 @@ struct a6xx_state_memobj { static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize) { struct a6xx_state_memobj *obj = - kzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL); + kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL); if (!obj) return NULL; @@ -147,8 +147,7 @@ static int a6xx_crashdumper_run(struct msm_gpu *gpu, /* Make sure all pending memory writes are posted */ wmb(); - gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, - REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova); + gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova); gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1); @@ -813,6 +812,9 @@ static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo( { struct msm_gpu_state_bo *snapshot; + if (!bo->size) + return NULL; + snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot)); if (!snapshot) return NULL; @@ -1040,8 +1042,13 @@ static void a6xx_gpu_state_destroy(struct kref *kref) if (a6xx_state->gmu_hfi) kvfree(a6xx_state->gmu_hfi->data); - list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) - kfree(obj); + if (a6xx_state->gmu_debug) + kvfree(a6xx_state->gmu_debug->data); + + list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) { + list_del(&obj->node); + kvfree(obj); + } adreno_gpu_state_destroy(state); kfree(a6xx_state); diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 24b489b6129a..628806423f7d 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -679,6 +679,9 @@ static int adreno_system_suspend(struct device *dev) struct msm_gpu *gpu = dev_to_gpu(dev); int remaining, ret; + if (!gpu) + return 0; + suspend_scheduler(gpu); remaining = wait_event_timeout(gpu->retire_event, @@ -700,7 +703,12 @@ out: static int adreno_system_resume(struct device *dev) { - resume_scheduler(dev_to_gpu(dev)); + struct msm_gpu *gpu = dev_to_gpu(dev); + + if (!gpu) + return 0; + + resume_scheduler(gpu); return pm_runtime_force_resume(dev); } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 382fb7f9e497..57586c794b84 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -191,37 +191,38 @@ int adreno_zap_shader_load(struct msm_gpu *gpu, u32 pasid) return zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw, pasid); } -void adreno_set_llc_attributes(struct iommu_domain *iommu) +struct msm_gem_address_space * +adreno_create_address_space(struct msm_gpu *gpu, + struct platform_device *pdev) { - iommu_set_pgtable_quirks(iommu, IO_PGTABLE_QUIRK_ARM_OUTER_WBWA); + return adreno_iommu_create_address_space(gpu, pdev, 0); } struct msm_gem_address_space * adreno_iommu_create_address_space(struct msm_gpu *gpu, - struct platform_device *pdev) + struct platform_device *pdev, + unsigned long quirks) { - struct iommu_domain *iommu; + struct iommu_domain_geometry *geometry; struct msm_mmu *mmu; struct msm_gem_address_space *aspace; u64 start, size; - iommu = iommu_domain_alloc(&platform_bus_type); - if (!iommu) - return NULL; - - mmu = msm_iommu_new(&pdev->dev, iommu); - if (IS_ERR(mmu)) { - iommu_domain_free(iommu); + mmu = msm_iommu_new(&pdev->dev, quirks); + if (IS_ERR_OR_NULL(mmu)) return ERR_CAST(mmu); - } + + geometry = msm_iommu_get_geometry(mmu); + if (IS_ERR(geometry)) + return ERR_CAST(geometry); /* * Use the aperture start or SZ_16M, whichever is greater. This will * ensure that we align with the allocated pagetable range while still * allowing room in the lower 32 bits for GMEM and whatnot */ - start = max_t(u64, SZ_16M, iommu->geometry.aperture_start); - size = iommu->geometry.aperture_end - start + 1; + start = max_t(u64, SZ_16M, geometry->aperture_start); + size = geometry->aperture_end - start + 1; aspace = msm_gem_address_space_create(mmu, "gpu", start & GENMASK_ULL(48, 0), size); @@ -729,7 +730,12 @@ static char *adreno_gpu_ascii85_encode(u32 *src, size_t len) return buf; } -/* len is expected to be in bytes */ +/* len is expected to be in bytes + * + * WARNING: *ptr should be allocated with kvmalloc or friends. It can be free'd + * with kvfree() and replaced with a newly kvmalloc'd buffer on the first call + * when the unencoded raw data is encoded + */ void adreno_show_object(struct drm_printer *p, void **ptr, int len, bool *encoded) { diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index e7adc5c632d0..5d4b1c95033f 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -335,10 +335,13 @@ void adreno_show_object(struct drm_printer *p, void **ptr, int len, * attached targets */ struct msm_gem_address_space * -adreno_iommu_create_address_space(struct msm_gpu *gpu, - struct platform_device *pdev); +adreno_create_address_space(struct msm_gpu *gpu, + struct platform_device *pdev); -void adreno_set_llc_attributes(struct iommu_domain *iommu); +struct msm_gem_address_space * +adreno_iommu_create_address_space(struct msm_gpu *gpu, + struct platform_device *pdev, + unsigned long quirks); int adreno_read_speedbin(struct device *dev, u32 *speedbin); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c index f436a1f3419d..d95540309d4d 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c @@ -434,6 +434,12 @@ static const struct dpu_format dpu_format_map[] = { DPU_CHROMA_H2V1, DPU_FORMAT_FLAG_YUV, DPU_FETCH_LINEAR, 2), + PSEUDO_YUV_FMT_LOOSE(P010, + 0, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, + C1_B_Cb, C2_R_Cr, + DPU_CHROMA_420, DPU_FORMAT_FLAG_DX | DPU_FORMAT_FLAG_YUV, + DPU_FETCH_LINEAR, 2), + INTERLEAVED_YUV_FMT(VYUY, 0, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, C2_R_Cr, C0_G_Y, C1_B_Cb, C0_G_Y, @@ -524,12 +530,26 @@ static const struct dpu_format dpu_format_map_ubwc[] = { true, 4, DPU_FORMAT_FLAG_DX | DPU_FORMAT_FLAG_COMPRESSED, DPU_FETCH_UBWC, 2, DPU_TILE_HEIGHT_UBWC), + INTERLEAVED_RGB_FMT_TILED(XRGB2101010, + COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, + C2_R_Cr, C0_G_Y, C1_B_Cb, C3_ALPHA, 4, + true, 4, DPU_FORMAT_FLAG_DX | DPU_FORMAT_FLAG_COMPRESSED, + DPU_FETCH_UBWC, 2, DPU_TILE_HEIGHT_UBWC), + PSEUDO_YUV_FMT_TILED(NV12, 0, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, C1_B_Cb, C2_R_Cr, DPU_CHROMA_420, DPU_FORMAT_FLAG_YUV | DPU_FORMAT_FLAG_COMPRESSED, DPU_FETCH_UBWC, 4, DPU_TILE_HEIGHT_NV12), + + PSEUDO_YUV_FMT_TILED(P010, + 0, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, + C1_B_Cb, C2_R_Cr, + DPU_CHROMA_420, DPU_FORMAT_FLAG_DX | + DPU_FORMAT_FLAG_YUV | + DPU_FORMAT_FLAG_COMPRESSED, + DPU_FETCH_UBWC, 4, DPU_TILE_HEIGHT_UBWC), }; /* _dpu_get_v_h_subsample_rate - Get subsample rates for all formats we support @@ -571,13 +591,15 @@ static int _dpu_format_get_media_color_ubwc(const struct dpu_format *fmt) {DRM_FORMAT_XBGR8888, COLOR_FMT_RGBA8888_UBWC}, {DRM_FORMAT_XRGB8888, COLOR_FMT_RGBA8888_UBWC}, {DRM_FORMAT_ABGR2101010, COLOR_FMT_RGBA1010102_UBWC}, + {DRM_FORMAT_XRGB2101010, COLOR_FMT_RGBA1010102_UBWC}, {DRM_FORMAT_XBGR2101010, COLOR_FMT_RGBA1010102_UBWC}, {DRM_FORMAT_BGR565, COLOR_FMT_RGB565_UBWC}, }; int color_fmt = -1; int i; - if (fmt->base.pixel_format == DRM_FORMAT_NV12) { + if (fmt->base.pixel_format == DRM_FORMAT_NV12 || + fmt->base.pixel_format == DRM_FORMAT_P010) { if (DPU_FORMAT_IS_DX(fmt)) { if (fmt->unpack_tight) color_fmt = COLOR_FMT_NV12_BPP10_UBWC; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index 27f029fdc682..2196e205efa5 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -156,6 +156,7 @@ static const uint32_t plane_formats[] = { DRM_FORMAT_RGBX8888, DRM_FORMAT_BGRX8888, DRM_FORMAT_XBGR8888, + DRM_FORMAT_XRGB2101010, DRM_FORMAT_RGB888, DRM_FORMAT_BGR888, DRM_FORMAT_RGB565, @@ -184,6 +185,7 @@ static const uint32_t plane_formats_yuv[] = { DRM_FORMAT_RGBA8888, DRM_FORMAT_BGRX8888, DRM_FORMAT_BGRA8888, + DRM_FORMAT_XRGB2101010, DRM_FORMAT_XRGB8888, DRM_FORMAT_XBGR8888, DRM_FORMAT_RGBX8888, @@ -208,6 +210,7 @@ static const uint32_t plane_formats_yuv[] = { DRM_FORMAT_RGBX4444, DRM_FORMAT_BGRX4444, + DRM_FORMAT_P010, DRM_FORMAT_NV12, DRM_FORMAT_NV21, DRM_FORMAT_NV16, @@ -318,6 +321,18 @@ static const struct dpu_caps sc7180_dpu_caps = { .pixel_ram_size = DEFAULT_PIXEL_RAM_SIZE, }; +static const struct dpu_caps sm6115_dpu_caps = { + .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, + .max_mixer_blendstages = 0x4, + .qseed_type = DPU_SSPP_SCALER_QSEED3LITE, + .smart_dma_rev = DPU_SSPP_SMART_DMA_V2, /* TODO: v2.5 */ + .ubwc_version = DPU_HW_UBWC_VER_20, + .has_dim_layer = true, + .has_idle_pc = true, + .max_linewidth = 2160, + .pixel_ram_size = DEFAULT_PIXEL_RAM_SIZE, +}; + static const struct dpu_caps sm8150_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0xb, @@ -472,6 +487,19 @@ static const struct dpu_mdp_cfg sc8180x_mdp[] = { }, }; +static const struct dpu_mdp_cfg sm6115_mdp[] = { + { + .name = "top_0", .id = MDP_TOP, + .base = 0x0, .len = 0x494, + .features = 0, + .highest_bank_bit = 0x1, + .clk_ctrls[DPU_CLK_CTRL_VIG0] = { + .reg_off = 0x2ac, .bit_off = 0}, + .clk_ctrls[DPU_CLK_CTRL_DMA0] = { + .reg_off = 0x2ac, .bit_off = 8}, + }, +}; + static const struct dpu_mdp_cfg sm8250_mdp[] = { { .name = "top_0", .id = MDP_TOP, @@ -849,6 +877,16 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = { sdm845_dma_sblk_2, 9, SSPP_TYPE_DMA, DPU_CLK_CTRL_CURSOR1), }; +static const struct dpu_sspp_sub_blks sm6115_vig_sblk_0 = + _VIG_SBLK("0", 2, DPU_SSPP_SCALER_QSEED3LITE); + +static const struct dpu_sspp_cfg sm6115_sspp[] = { + SSPP_BLK("sspp_0", SSPP_VIG0, 0x4000, VIG_SM8250_MASK, + sm6115_vig_sblk_0, 0, SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG0), + SSPP_BLK("sspp_8", SSPP_DMA0, 0x24000, DMA_SDM845_MASK, + sdm845_dma_sblk_0, 1, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA0), +}; + static const struct dpu_sspp_sub_blks sm8250_vig_sblk_0 = _VIG_SBLK("0", 5, DPU_SSPP_SCALER_QSEED3LITE); static const struct dpu_sspp_sub_blks sm8250_vig_sblk_1 = @@ -1175,6 +1213,13 @@ static const struct dpu_pingpong_cfg sm8150_pp[] = { -1), }; +static const struct dpu_pingpong_cfg sc7280_pp[] = { + PP_BLK("pingpong_0", PINGPONG_0, 0x59000, 0, sc7280_pp_sblk, -1, -1), + PP_BLK("pingpong_1", PINGPONG_1, 0x6a000, 0, sc7280_pp_sblk, -1, -1), + PP_BLK("pingpong_2", PINGPONG_2, 0x6b000, 0, sc7280_pp_sblk, -1, -1), + PP_BLK("pingpong_3", PINGPONG_3, 0x6c000, 0, sc7280_pp_sblk, -1, -1), +}; + static struct dpu_pingpong_cfg qcm2290_pp[] = { PP_BLK("pingpong_0", PINGPONG_0, 0x70000, 0, sdm845_pp_sblk, DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8), @@ -1198,13 +1243,6 @@ static const struct dpu_merge_3d_cfg sm8150_merge_3d[] = { MERGE_3D_BLK("merge_3d_2", MERGE_3D_2, 0x83200), }; -static const struct dpu_pingpong_cfg sc7280_pp[] = { - PP_BLK("pingpong_0", PINGPONG_0, 0x59000, 0, sc7280_pp_sblk, -1, -1), - PP_BLK("pingpong_1", PINGPONG_1, 0x6a000, 0, sc7280_pp_sblk, -1, -1), - PP_BLK("pingpong_2", PINGPONG_2, 0x6b000, 0, sc7280_pp_sblk, -1, -1), - PP_BLK("pingpong_3", PINGPONG_3, 0x6c000, 0, sc7280_pp_sblk, -1, -1), -}; - /************************************************************* * DSC sub blocks config *************************************************************/ @@ -1587,6 +1625,35 @@ static const struct dpu_perf_cfg sc7180_perf_data = { .bw_inefficiency_factor = 120, }; +static const struct dpu_perf_cfg sm6115_perf_data = { + .max_bw_low = 3100000, + .max_bw_high = 4000000, + .min_core_ib = 2400000, + .min_llcc_ib = 800000, + .min_dram_ib = 800000, + .min_prefill_lines = 24, + .danger_lut_tbl = {0xff, 0xffff, 0x0}, + .safe_lut_tbl = {0xfff0, 0xff00, 0xffff}, + .qos_lut_tbl = { + {.nentry = ARRAY_SIZE(sc7180_qos_linear), + .entries = sc7180_qos_linear + }, + {.nentry = ARRAY_SIZE(sc7180_qos_macrotile), + .entries = sc7180_qos_macrotile + }, + {.nentry = ARRAY_SIZE(sc7180_qos_nrt), + .entries = sc7180_qos_nrt + }, + /* TODO: macrotile-qseed is different from macrotile */ + }, + .cdp_cfg = { + {.rd_enable = 1, .wr_enable = 1}, + {.rd_enable = 1, .wr_enable = 0} + }, + .clk_inefficiency_factor = 105, + .bw_inefficiency_factor = 120, +}; + static const struct dpu_perf_cfg sm8150_perf_data = { .max_bw_low = 12800000, .max_bw_high = 12800000, @@ -1798,6 +1865,28 @@ static const struct dpu_mdss_cfg sc7180_dpu_cfg = { .mdss_irqs = IRQ_SC7180_MASK, }; +static const struct dpu_mdss_cfg sm6115_dpu_cfg = { + .caps = &sm6115_dpu_caps, + .mdp_count = ARRAY_SIZE(sm6115_mdp), + .mdp = sm6115_mdp, + .ctl_count = ARRAY_SIZE(qcm2290_ctl), + .ctl = qcm2290_ctl, + .sspp_count = ARRAY_SIZE(sm6115_sspp), + .sspp = sm6115_sspp, + .mixer_count = ARRAY_SIZE(qcm2290_lm), + .mixer = qcm2290_lm, + .dspp_count = ARRAY_SIZE(qcm2290_dspp), + .dspp = qcm2290_dspp, + .pingpong_count = ARRAY_SIZE(qcm2290_pp), + .pingpong = qcm2290_pp, + .intf_count = ARRAY_SIZE(qcm2290_intf), + .intf = qcm2290_intf, + .vbif_count = ARRAY_SIZE(sdm845_vbif), + .vbif = sdm845_vbif, + .perf = &sm6115_perf_data, + .mdss_irqs = IRQ_SC7180_MASK, +}; + static const struct dpu_mdss_cfg sm8150_dpu_cfg = { .caps = &sm8150_dpu_caps, .mdp_count = ARRAY_SIZE(sdm845_mdp), @@ -1932,6 +2021,7 @@ static const struct dpu_mdss_hw_cfg_handler cfg_handler[] = { { .hw_rev = DPU_HW_VER_510, .dpu_cfg = &sc8180x_dpu_cfg}, { .hw_rev = DPU_HW_VER_600, .dpu_cfg = &sm8250_dpu_cfg}, { .hw_rev = DPU_HW_VER_620, .dpu_cfg = &sc7180_dpu_cfg}, + { .hw_rev = DPU_HW_VER_630, .dpu_cfg = &sm6115_dpu_cfg}, { .hw_rev = DPU_HW_VER_650, .dpu_cfg = &qcm2290_dpu_cfg}, { .hw_rev = DPU_HW_VER_720, .dpu_cfg = &sc7280_dpu_cfg}, }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h index 38aa38ab1568..3b645d5aa9aa 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h @@ -44,6 +44,7 @@ #define DPU_HW_VER_510 DPU_HW_VER(5, 1, 1) /* sc8180 */ #define DPU_HW_VER_600 DPU_HW_VER(6, 0, 0) /* sm8250 */ #define DPU_HW_VER_620 DPU_HW_VER(6, 2, 0) /* sc7180 v1.0 */ +#define DPU_HW_VER_630 DPU_HW_VER(6, 3, 0) /* sm6115|sm4250 */ #define DPU_HW_VER_650 DPU_HW_VER(6, 5, 0) /* qcm2290|sm4125 */ #define DPU_HW_VER_720 DPU_HW_VER(7, 2, 0) /* sc7280 */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c index f2ddcfb6f7ee..3662df698dae 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c @@ -42,7 +42,7 @@ static void dpu_hw_dsc_config(struct dpu_hw_dsc *hw_dsc, u32 initial_lines) { struct dpu_hw_blk_reg_map *c = &hw_dsc->hw; - u32 data, lsb, bpp; + u32 data; u32 slice_last_group_size; u32 det_thresh_flatness; bool is_cmd_mode = !(mode & DSC_MODE_VIDEO); @@ -56,14 +56,7 @@ static void dpu_hw_dsc_config(struct dpu_hw_dsc *hw_dsc, data = (initial_lines << 20); data |= ((slice_last_group_size - 1) << 18); /* bpp is 6.4 format, 4 LSBs bits are for fractional part */ - data |= dsc->bits_per_pixel << 12; - lsb = dsc->bits_per_pixel % 4; - bpp = dsc->bits_per_pixel / 4; - bpp *= 4; - bpp <<= 4; - bpp |= lsb; - - data |= bpp << 8; + data |= (dsc->bits_per_pixel << 8); data |= (dsc->block_pred_enable << 7); data |= (dsc->line_buf_depth << 3); data |= (dsc->simple_422 << 2); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 5e6e2626151e..b71199511a52 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -194,7 +194,7 @@ struct dpu_debugfs_regset32 { struct dpu_kms *dpu_kms; }; -static int _dpu_debugfs_show_regset32(struct seq_file *s, void *data) +static int dpu_regset32_show(struct seq_file *s, void *data) { struct dpu_debugfs_regset32 *regset = s->private; struct dpu_kms *dpu_kms = regset->dpu_kms; @@ -227,19 +227,7 @@ static int _dpu_debugfs_show_regset32(struct seq_file *s, void *data) return 0; } - -static int dpu_debugfs_open_regset32(struct inode *inode, - struct file *file) -{ - return single_open(file, _dpu_debugfs_show_regset32, inode->i_private); -} - -static const struct file_operations dpu_fops_regset32 = { - .open = dpu_debugfs_open_regset32, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(dpu_regset32); void dpu_debugfs_create_regset32(const char *name, umode_t mode, void *parent, @@ -259,7 +247,7 @@ void dpu_debugfs_create_regset32(const char *name, umode_t mode, regset->blk_len = length; regset->dpu_kms = dpu_kms; - debugfs_create_file(name, mode, parent, regset, &dpu_fops_regset32); + debugfs_create_file(name, mode, parent, regset, &dpu_regset32_fops); } static int dpu_kms_debugfs_init(struct msm_kms *kms, struct drm_minor *minor) @@ -1304,6 +1292,7 @@ static const struct of_device_id dpu_dt_match[] = { { .compatible = "qcom,sc7180-dpu", }, { .compatible = "qcom,sc7280-dpu", }, { .compatible = "qcom,sc8180x-dpu", }, + { .compatible = "qcom,sm6115-dpu", }, { .compatible = "qcom,sm8150-dpu", }, { .compatible = "qcom,sm8250-dpu", }, {} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 658005f609f4..86719020afe2 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -69,9 +69,11 @@ static const uint32_t qcom_compressed_supported_formats[] = { DRM_FORMAT_ARGB8888, DRM_FORMAT_XBGR8888, DRM_FORMAT_XRGB8888, + DRM_FORMAT_XRGB2101010, DRM_FORMAT_BGR565, DRM_FORMAT_NV12, + DRM_FORMAT_P010, }; /** diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c index 964573d26d26..9a1a0769575d 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c @@ -387,7 +387,7 @@ static int mdp4_kms_init(struct drm_device *dev) struct msm_drm_private *priv = dev->dev_private; struct mdp4_kms *mdp4_kms; struct msm_kms *kms = NULL; - struct iommu_domain *iommu; + struct msm_mmu *mmu; struct msm_gem_address_space *aspace; int irq, ret; u32 major, minor; @@ -499,10 +499,15 @@ static int mdp4_kms_init(struct drm_device *dev) mdp4_disable(mdp4_kms); mdelay(16); - iommu = iommu_domain_alloc(pdev->dev.bus); - if (iommu) { - struct msm_mmu *mmu = msm_iommu_new(&pdev->dev, iommu); - + mmu = msm_iommu_new(&pdev->dev, 0); + if (IS_ERR(mmu)) { + ret = PTR_ERR(mmu); + goto fail; + } else if (!mmu) { + DRM_DEV_INFO(dev->dev, "no iommu, fallback to phys " + "contig buffers for scanout\n"); + aspace = NULL; + } else { aspace = msm_gem_address_space_create(mmu, "mdp4", 0x1000, 0x100000000 - 0x1000); @@ -514,10 +519,6 @@ static int mdp4_kms_init(struct drm_device *dev) } kms->aspace = aspace; - } else { - DRM_DEV_INFO(dev->dev, "no iommu, fallback to phys " - "contig buffers for scanout\n"); - aspace = NULL; } ret = modeset_init(mdp4_kms); diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c index 7288041dd86a..7444b75c4215 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c @@ -56,8 +56,9 @@ static int mdp4_lvds_connector_get_modes(struct drm_connector *connector) return ret; } -static int mdp4_lvds_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) +static enum drm_mode_status +mdp4_lvds_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) { struct mdp4_lvds_connector *mdp4_lvds_connector = to_mdp4_lvds_connector(connector); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index b0d21838a134..29ae5c9613f3 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -203,7 +203,7 @@ static int mdp5_set_split_display(struct msm_kms *kms, slave_encoder); } -static void mdp5_destroy(struct platform_device *pdev); +static void mdp5_destroy(struct mdp5_kms *mdp5_kms); static void mdp5_kms_destroy(struct msm_kms *kms) { @@ -223,7 +223,7 @@ static void mdp5_kms_destroy(struct msm_kms *kms) } mdp_kms_destroy(&mdp5_kms->base); - mdp5_destroy(mdp5_kms->pdev); + mdp5_destroy(mdp5_kms); } #ifdef CONFIG_DEBUG_FS @@ -559,6 +559,8 @@ static int mdp5_kms_init(struct drm_device *dev) int irq, i, ret; ret = mdp5_init(to_platform_device(dev->dev), dev); + if (ret) + return ret; /* priv->kms would have been populated by the MDP5 driver */ kms = priv->kms; @@ -632,9 +634,8 @@ fail: return ret; } -static void mdp5_destroy(struct platform_device *pdev) +static void mdp5_destroy(struct mdp5_kms *mdp5_kms) { - struct mdp5_kms *mdp5_kms = platform_get_drvdata(pdev); int i; if (mdp5_kms->ctlm) @@ -648,7 +649,7 @@ static void mdp5_destroy(struct platform_device *pdev) kfree(mdp5_kms->intfs[i]); if (mdp5_kms->rpm_enabled) - pm_runtime_disable(&pdev->dev); + pm_runtime_disable(&mdp5_kms->pdev->dev); drm_atomic_private_obj_fini(&mdp5_kms->glob_state); drm_modeset_lock_fini(&mdp5_kms->glob_state_lock); @@ -797,8 +798,6 @@ static int mdp5_init(struct platform_device *pdev, struct drm_device *dev) goto fail; } - platform_set_drvdata(pdev, mdp5_kms); - spin_lock_init(&mdp5_kms->resource_lock); mdp5_kms->dev = dev; @@ -839,6 +838,9 @@ static int mdp5_init(struct platform_device *pdev, struct drm_device *dev) */ clk_set_rate(mdp5_kms->core_clk, 200000000); + /* set uninit-ed kms */ + priv->kms = &mdp5_kms->base.base; + pm_runtime_enable(&pdev->dev); mdp5_kms->rpm_enabled = true; @@ -890,13 +892,10 @@ static int mdp5_init(struct platform_device *pdev, struct drm_device *dev) if (ret) goto fail; - /* set uninit-ed kms */ - priv->kms = &mdp5_kms->base.base; - return 0; fail: if (mdp5_kms) - mdp5_destroy(pdev); + mdp5_destroy(mdp5_kms); return ret; } @@ -953,7 +952,8 @@ static int mdp5_dev_remove(struct platform_device *pdev) static __maybe_unused int mdp5_runtime_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); - struct mdp5_kms *mdp5_kms = platform_get_drvdata(pdev); + struct msm_drm_private *priv = platform_get_drvdata(pdev); + struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(priv->kms)); DBG(""); @@ -963,7 +963,8 @@ static __maybe_unused int mdp5_runtime_suspend(struct device *dev) static __maybe_unused int mdp5_runtime_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); - struct mdp5_kms *mdp5_kms = platform_get_drvdata(pdev); + struct msm_drm_private *priv = platform_get_drvdata(pdev); + struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(priv->kms)); DBG(""); diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index 3854c9f1f7e9..dd26ca651a05 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1243,8 +1243,7 @@ static int dp_ctrl_link_train(struct dp_ctrl_private *ctrl, { int ret = 0; const u8 *dpcd = ctrl->panel->dpcd; - u8 encoding = DP_SET_ANSI_8B10B; - u8 ssc; + u8 encoding[] = { 0, DP_SET_ANSI_8B10B }; u8 assr; struct dp_link_info link_info = {0}; @@ -1256,13 +1255,11 @@ static int dp_ctrl_link_train(struct dp_ctrl_private *ctrl, dp_aux_link_configure(ctrl->aux, &link_info); - if (drm_dp_max_downspread(dpcd)) { - ssc = DP_SPREAD_AMP_0_5; - drm_dp_dpcd_write(ctrl->aux, DP_DOWNSPREAD_CTRL, &ssc, 1); - } + if (drm_dp_max_downspread(dpcd)) + encoding[0] |= DP_SPREAD_AMP_0_5; - drm_dp_dpcd_write(ctrl->aux, DP_MAIN_LINK_CHANNEL_CODING_SET, - &encoding, 1); + /* config DOWNSPREAD_CTRL and MAIN_LINK_CHANNEL_CODING_SET */ + drm_dp_dpcd_write(ctrl->aux, DP_DOWNSPREAD_CTRL, encoding, 2); if (drm_dp_alternate_scrambler_reset_cap(dpcd)) { assr = DP_ALTERNATE_SCRAMBLER_RESET_ENABLE; diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 39c9e55f07cc..7ff60e5ff325 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -1249,7 +1249,7 @@ int dp_display_request_irq(struct msm_dp *dp_display) return -EINVAL; } - rc = devm_request_irq(&dp->pdev->dev, dp->irq, + rc = devm_request_irq(dp_display->drm_dev->dev, dp->irq, dp_display_irq_handler, IRQF_TRIGGER_HIGH, "dp_display_isr", dp); if (rc < 0) { @@ -1528,6 +1528,11 @@ void msm_dp_debugfs_init(struct msm_dp *dp_display, struct drm_minor *minor) } } +static void of_dp_aux_depopulate_bus_void(void *data) +{ + of_dp_aux_depopulate_bus(data); +} + static int dp_display_get_next_bridge(struct msm_dp *dp) { int rc; @@ -1552,10 +1557,16 @@ static int dp_display_get_next_bridge(struct msm_dp *dp) * panel driver is probed asynchronously but is the best we * can do without a bigger driver reorganization. */ - rc = devm_of_dp_aux_populate_ep_devices(dp_priv->aux); + rc = of_dp_aux_populate_bus(dp_priv->aux, NULL); of_node_put(aux_bus); if (rc) goto error; + + rc = devm_add_action_or_reset(dp->drm_dev->dev, + of_dp_aux_depopulate_bus_void, + dp_priv->aux); + if (rc) + goto error; } else if (dp->is_edp) { DRM_ERROR("eDP aux_bus not found\n"); return -ENODEV; @@ -1568,7 +1579,7 @@ static int dp_display_get_next_bridge(struct msm_dp *dp) * For DisplayPort interfaces external bridges are optional, so * silently ignore an error if one is not present (-ENODEV). */ - rc = dp_parser_find_next_bridge(dp_priv->parser); + rc = devm_dp_parser_find_next_bridge(dp->drm_dev->dev, dp_priv->parser); if (!dp->is_edp && rc == -ENODEV) return 0; @@ -1589,14 +1600,10 @@ error: int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev, struct drm_encoder *encoder) { - struct msm_drm_private *priv; + struct msm_drm_private *priv = dev->dev_private; struct dp_display_private *dp_priv; int ret; - if (WARN_ON(!encoder) || WARN_ON(!dp_display) || WARN_ON(!dev)) - return -EINVAL; - - priv = dev->dev_private; dp_display->drm_dev = dev; dp_priv = container_of(dp_display, struct dp_display_private, dp_display); diff --git a/drivers/gpu/drm/msm/dp/dp_drm.c b/drivers/gpu/drm/msm/dp/dp_drm.c index 6df25f7662e7..6db82f9b03af 100644 --- a/drivers/gpu/drm/msm/dp/dp_drm.c +++ b/drivers/gpu/drm/msm/dp/dp_drm.c @@ -31,6 +31,36 @@ static enum drm_connector_status dp_bridge_detect(struct drm_bridge *bridge) connector_status_disconnected; } +static int dp_bridge_atomic_check(struct drm_bridge *bridge, + struct drm_bridge_state *bridge_state, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + struct msm_dp *dp; + + dp = to_dp_bridge(bridge)->dp_display; + + drm_dbg_dp(dp->drm_dev, "is_connected = %s\n", + (dp->is_connected) ? "true" : "false"); + + /* + * There is no protection in the DRM framework to check if the display + * pipeline has been already disabled before trying to disable it again. + * Hence if the sink is unplugged, the pipeline gets disabled, but the + * crtc->active is still true. Any attempt to set the mode or manually + * disable this encoder will result in the crash. + * + * TODO: add support for telling the DRM subsystem that the pipeline is + * disabled by the hardware and thus all access to it should be forbidden. + * After that this piece of code can be removed. + */ + if (bridge->ops & DRM_BRIDGE_OP_HPD) + return (dp->is_connected) ? 0 : -ENOTCONN; + + return 0; +} + + /** * dp_bridge_get_modes - callback to add drm modes via drm_mode_probed_add() * @bridge: Poiner to drm bridge @@ -61,6 +91,9 @@ static int dp_bridge_get_modes(struct drm_bridge *bridge, struct drm_connector * } static const struct drm_bridge_funcs dp_bridge_ops = { + .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, + .atomic_reset = drm_atomic_helper_bridge_reset, .enable = dp_bridge_enable, .disable = dp_bridge_disable, .post_disable = dp_bridge_post_disable, @@ -68,6 +101,7 @@ static const struct drm_bridge_funcs dp_bridge_ops = { .mode_valid = dp_bridge_mode_valid, .get_modes = dp_bridge_get_modes, .detect = dp_bridge_detect, + .atomic_check = dp_bridge_atomic_check, }; struct drm_bridge *dp_bridge_init(struct msm_dp *dp_display, struct drm_device *dev, diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c index 36bb6191d2f0..f1f1d646539d 100644 --- a/drivers/gpu/drm/msm/dp/dp_link.c +++ b/drivers/gpu/drm/msm/dp/dp_link.c @@ -49,23 +49,26 @@ static int dp_aux_link_power_up(struct drm_dp_aux *aux, struct dp_link_info *link) { u8 value; - int err; + ssize_t len; + int i; if (link->revision < 0x11) return 0; - err = drm_dp_dpcd_readb(aux, DP_SET_POWER, &value); - if (err < 0) - return err; + len = drm_dp_dpcd_readb(aux, DP_SET_POWER, &value); + if (len < 0) + return len; value &= ~DP_SET_POWER_MASK; value |= DP_SET_POWER_D0; - err = drm_dp_dpcd_writeb(aux, DP_SET_POWER, value); - if (err < 0) - return err; - - usleep_range(1000, 2000); + /* retry for 1ms to give the sink time to wake up */ + for (i = 0; i < 3; i++) { + len = drm_dp_dpcd_writeb(aux, DP_SET_POWER, value); + usleep_range(1000, 2000); + if (len == 1) + break; + } return 0; } diff --git a/drivers/gpu/drm/msm/dp/dp_parser.c b/drivers/gpu/drm/msm/dp/dp_parser.c index dd732215d55b..dcbe893d66d7 100644 --- a/drivers/gpu/drm/msm/dp/dp_parser.c +++ b/drivers/gpu/drm/msm/dp/dp_parser.c @@ -240,12 +240,12 @@ static int dp_parser_clock(struct dp_parser *parser) return 0; } -int dp_parser_find_next_bridge(struct dp_parser *parser) +int devm_dp_parser_find_next_bridge(struct device *dev, struct dp_parser *parser) { - struct device *dev = &parser->pdev->dev; + struct platform_device *pdev = parser->pdev; struct drm_bridge *bridge; - bridge = devm_drm_of_get_bridge(dev, dev->of_node, 1, 0); + bridge = devm_drm_of_get_bridge(dev, pdev->dev.of_node, 1, 0); if (IS_ERR(bridge)) return PTR_ERR(bridge); diff --git a/drivers/gpu/drm/msm/dp/dp_parser.h b/drivers/gpu/drm/msm/dp/dp_parser.h index 866c1a82bf1a..d30ab773db46 100644 --- a/drivers/gpu/drm/msm/dp/dp_parser.h +++ b/drivers/gpu/drm/msm/dp/dp_parser.h @@ -138,8 +138,9 @@ struct dp_parser { struct dp_parser *dp_parser_get(struct platform_device *pdev); /** - * dp_parser_find_next_bridge() - find an additional bridge to DP + * devm_dp_parser_find_next_bridge() - find an additional bridge to DP * + * @dev: device to tie bridge lifetime to * @parser: dp_parser data from client * * This function is used to find any additional bridge attached to @@ -147,6 +148,6 @@ struct dp_parser *dp_parser_get(struct platform_device *pdev); * * Return: 0 if able to get the bridge, otherwise negative errno for failure. */ -int dp_parser_find_next_bridge(struct dp_parser *parser); +int devm_dp_parser_find_next_bridge(struct device *dev, struct dp_parser *parser); #endif diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c index 39bbabb5daf6..31fdee2052be 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.c +++ b/drivers/gpu/drm/msm/dsi/dsi.c @@ -211,13 +211,14 @@ void __exit msm_dsi_unregister(void) int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev, struct drm_encoder *encoder) { - struct msm_drm_private *priv; + struct msm_drm_private *priv = dev->dev_private; int ret; - if (WARN_ON(!encoder) || WARN_ON(!msm_dsi) || WARN_ON(!dev)) - return -EINVAL; + if (priv->num_bridges == ARRAY_SIZE(priv->bridges)) { + DRM_DEV_ERROR(dev->dev, "too many bridges\n"); + return -ENOSPC; + } - priv = dev->dev_private; msm_dsi->dev = dev; ret = msm_dsi_host_modeset_init(msm_dsi->host, dev); diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 7fbf391c024f..89aadd3b3202 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -21,6 +21,7 @@ #include <video/mipi_display.h> +#include <drm/display/drm_dsc_helper.h> #include <drm/drm_of.h> #include "dsi.h" @@ -33,7 +34,7 @@ #define DSI_RESET_TOGGLE_DELAY_MS 20 -static int dsi_populate_dsc_params(struct drm_dsc_config *dsc); +static int dsi_populate_dsc_params(struct msm_dsi_host *msm_host, struct drm_dsc_config *dsc); static int dsi_get_version(const void __iomem *base, u32 *major, u32 *minor) { @@ -842,17 +843,15 @@ static void dsi_ctrl_config(struct msm_dsi_host *msm_host, bool enable, static void dsi_update_dsc_timing(struct msm_dsi_host *msm_host, bool is_cmd_mode, u32 hdisplay) { struct drm_dsc_config *dsc = msm_host->dsc; - u32 reg, intf_width, reg_ctrl, reg_ctrl2; + u32 reg, reg_ctrl, reg_ctrl2; u32 slice_per_intf, total_bytes_per_intf; u32 pkt_per_line; - u32 bytes_in_slice; u32 eol_byte_num; /* first calculate dsc parameters and then program * compress mode registers */ - intf_width = hdisplay; - slice_per_intf = DIV_ROUND_UP(intf_width, dsc->slice_width); + slice_per_intf = DIV_ROUND_UP(hdisplay, dsc->slice_width); /* If slice_per_pkt is greater than slice_per_intf * then default to 1. This can happen during partial @@ -861,12 +860,7 @@ static void dsi_update_dsc_timing(struct msm_dsi_host *msm_host, bool is_cmd_mod if (slice_per_intf > dsc->slice_count) dsc->slice_count = 1; - slice_per_intf = DIV_ROUND_UP(hdisplay, dsc->slice_width); - bytes_in_slice = DIV_ROUND_UP(dsc->slice_width * dsc->bits_per_pixel, 8); - - dsc->slice_chunk_size = bytes_in_slice; - - total_bytes_per_intf = bytes_in_slice * slice_per_intf; + total_bytes_per_intf = dsc->slice_chunk_size * slice_per_intf; eol_byte_num = total_bytes_per_intf % 3; pkt_per_line = slice_per_intf / dsc->slice_count; @@ -892,7 +886,7 @@ static void dsi_update_dsc_timing(struct msm_dsi_host *msm_host, bool is_cmd_mod reg_ctrl |= reg; reg_ctrl2 &= ~DSI_COMMAND_COMPRESSION_MODE_CTRL2_STREAM0_SLICE_WIDTH__MASK; - reg_ctrl2 |= DSI_COMMAND_COMPRESSION_MODE_CTRL2_STREAM0_SLICE_WIDTH(bytes_in_slice); + reg_ctrl2 |= DSI_COMMAND_COMPRESSION_MODE_CTRL2_STREAM0_SLICE_WIDTH(dsc->slice_chunk_size); dsi_write(msm_host, REG_DSI_COMMAND_COMPRESSION_MODE_CTRL, reg_ctrl); dsi_write(msm_host, REG_DSI_COMMAND_COMPRESSION_MODE_CTRL2, reg_ctrl2); @@ -915,6 +909,7 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi) u32 va_end = va_start + mode->vdisplay; u32 hdisplay = mode->hdisplay; u32 wc; + int ret; DBG(""); @@ -950,7 +945,9 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi) /* we do the calculations for dsc parameters here so that * panel can use these parameters */ - dsi_populate_dsc_params(dsc); + ret = dsi_populate_dsc_params(msm_host, dsc); + if (ret) + return; /* Divide the display by 3 but keep back/font porch and * pulse width same @@ -1754,18 +1751,20 @@ static char bpg_offset[DSC_NUM_BUF_RANGES] = { 2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -12, -12, -12, -12 }; -static int dsi_populate_dsc_params(struct drm_dsc_config *dsc) -{ - int mux_words_size; - int groups_per_line, groups_total; - int min_rate_buffer_size; - int hrd_delay; - int pre_num_extra_mux_bits, num_extra_mux_bits; - int slice_bits; - int target_bpp_x16; - int data; - int final_value, final_scale; +static int dsi_populate_dsc_params(struct msm_dsi_host *msm_host, struct drm_dsc_config *dsc) +{ int i; + u16 bpp = dsc->bits_per_pixel >> 4; + + if (dsc->bits_per_pixel & 0xf) { + DRM_DEV_ERROR(&msm_host->pdev->dev, "DSI does not support fractional bits_per_pixel\n"); + return -EINVAL; + } + + if (dsc->bits_per_component != 8) { + DRM_DEV_ERROR(&msm_host->pdev->dev, "DSI does not support bits_per_component != 8 yet\n"); + return -EOPNOTSUPP; + } dsc->rc_model_size = 8192; dsc->first_line_bpg_offset = 12; @@ -1783,16 +1782,21 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc) for (i = 0; i < DSC_NUM_BUF_RANGES; i++) { dsc->rc_range_params[i].range_min_qp = min_qp[i]; dsc->rc_range_params[i].range_max_qp = max_qp[i]; - dsc->rc_range_params[i].range_bpg_offset = bpg_offset[i]; + /* + * Range BPG Offset contains two's-complement signed values that fill + * 8 bits, yet the registers and DCS PPS field are only 6 bits wide. + */ + dsc->rc_range_params[i].range_bpg_offset = bpg_offset[i] & DSC_RANGE_BPG_OFFSET_MASK; } - dsc->initial_offset = 6144; /* Not bpp 12 */ - if (dsc->bits_per_pixel != 8) + dsc->initial_offset = 6144; /* Not bpp 12 */ + if (bpp != 8) dsc->initial_offset = 2048; /* bpp = 12 */ - mux_words_size = 48; /* bpc == 8/10 */ - if (dsc->bits_per_component == 12) - mux_words_size = 64; + if (dsc->bits_per_component <= 10) + dsc->mux_word_size = DSC_MUX_WORD_SIZE_8_10_BPC; + else + dsc->mux_word_size = DSC_MUX_WORD_SIZE_12_BPC; dsc->initial_xmit_delay = 512; dsc->initial_scale_value = 32; @@ -1804,63 +1808,8 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc) dsc->flatness_max_qp = 12; dsc->rc_quant_incr_limit0 = 11; dsc->rc_quant_incr_limit1 = 11; - dsc->mux_word_size = DSC_MUX_WORD_SIZE_8_10_BPC; - - /* FIXME: need to call drm_dsc_compute_rc_parameters() so that rest of - * params are calculated - */ - groups_per_line = DIV_ROUND_UP(dsc->slice_width, 3); - dsc->slice_chunk_size = dsc->slice_width * dsc->bits_per_pixel / 8; - if ((dsc->slice_width * dsc->bits_per_pixel) % 8) - dsc->slice_chunk_size++; - /* rbs-min */ - min_rate_buffer_size = dsc->rc_model_size - dsc->initial_offset + - dsc->initial_xmit_delay * dsc->bits_per_pixel + - groups_per_line * dsc->first_line_bpg_offset; - - hrd_delay = DIV_ROUND_UP(min_rate_buffer_size, dsc->bits_per_pixel); - - dsc->initial_dec_delay = hrd_delay - dsc->initial_xmit_delay; - - dsc->initial_scale_value = 8 * dsc->rc_model_size / - (dsc->rc_model_size - dsc->initial_offset); - - slice_bits = 8 * dsc->slice_chunk_size * dsc->slice_height; - - groups_total = groups_per_line * dsc->slice_height; - - data = dsc->first_line_bpg_offset * 2048; - - dsc->nfl_bpg_offset = DIV_ROUND_UP(data, (dsc->slice_height - 1)); - - pre_num_extra_mux_bits = 3 * (mux_words_size + (4 * dsc->bits_per_component + 4) - 2); - - num_extra_mux_bits = pre_num_extra_mux_bits - (mux_words_size - - ((slice_bits - pre_num_extra_mux_bits) % mux_words_size)); - - data = 2048 * (dsc->rc_model_size - dsc->initial_offset + num_extra_mux_bits); - dsc->slice_bpg_offset = DIV_ROUND_UP(data, groups_total); - - /* bpp * 16 + 0.5 */ - data = dsc->bits_per_pixel * 16; - data *= 2; - data++; - data /= 2; - target_bpp_x16 = data; - - data = (dsc->initial_xmit_delay * target_bpp_x16) / 16; - final_value = dsc->rc_model_size - data + num_extra_mux_bits; - dsc->final_offset = final_value; - - final_scale = 8 * dsc->rc_model_size / (dsc->rc_model_size - final_value); - - data = (final_scale - 9) * (dsc->nfl_bpg_offset + dsc->slice_bpg_offset); - dsc->scale_increment_interval = (2048 * dsc->final_offset) / data; - - dsc->scale_decrement_interval = groups_per_line / (dsc->initial_scale_value - 8); - - return 0; + return drm_dsc_compute_rc_parameters(dsc); } static int dsi_host_parse_dt(struct msm_dsi_host *msm_host) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 7fc0975cb869..ee6051367679 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -549,6 +549,8 @@ static const struct of_device_id dsi_phy_dt_match[] = { #ifdef CONFIG_DRM_MSM_DSI_14NM_PHY { .compatible = "qcom,dsi-phy-14nm", .data = &dsi_phy_14nm_cfgs }, + { .compatible = "qcom,dsi-phy-14nm-2290", + .data = &dsi_phy_14nm_2290_cfgs }, { .compatible = "qcom,dsi-phy-14nm-660", .data = &dsi_phy_14nm_660_cfgs }, { .compatible = "qcom,dsi-phy-14nm-8953", diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index 60a99c6525b2..1096afedd616 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -50,6 +50,7 @@ extern const struct msm_dsi_phy_cfg dsi_phy_20nm_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_28nm_8960_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_14nm_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_14nm_660_cfgs; +extern const struct msm_dsi_phy_cfg dsi_phy_14nm_2290_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_14nm_8953_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_10nm_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_10nm_8998_cfgs; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c index 0f8f4ca46429..9f488adea7f5 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c @@ -1081,3 +1081,20 @@ const struct msm_dsi_phy_cfg dsi_phy_14nm_8953_cfgs = { .io_start = { 0x1a94400, 0x1a96400 }, .num_dsi_phy = 2, }; + +const struct msm_dsi_phy_cfg dsi_phy_14nm_2290_cfgs = { + .has_phy_lane = true, + .regulator_data = dsi_phy_14nm_17mA_regulators, + .num_regulators = ARRAY_SIZE(dsi_phy_14nm_17mA_regulators), + .ops = { + .enable = dsi_14nm_phy_enable, + .disable = dsi_14nm_phy_disable, + .pll_init = dsi_pll_14nm_init, + .save_pll_state = dsi_14nm_pll_save_state, + .restore_pll_state = dsi_14nm_pll_restore_state, + }, + .min_pll_rate = VCO_MIN_RATE, + .max_pll_rate = VCO_MAX_RATE, + .io_start = { 0x5e94400 }, + .num_dsi_phy = 1, +}; diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c index 93fe61b86967..4d3fdc806bef 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi.c @@ -68,16 +68,17 @@ static void msm_hdmi_destroy(struct hdmi *hdmi) destroy_workqueue(hdmi->workq); msm_hdmi_hdcp_destroy(hdmi); + if (hdmi->i2c) + msm_hdmi_i2c_destroy(hdmi->i2c); +} + +static void msm_hdmi_put_phy(struct hdmi *hdmi) +{ if (hdmi->phy_dev) { put_device(hdmi->phy_dev); hdmi->phy = NULL; hdmi->phy_dev = NULL; } - - if (hdmi->i2c) - msm_hdmi_i2c_destroy(hdmi->i2c); - - platform_set_drvdata(hdmi->pdev, NULL); } static int msm_hdmi_get_phy(struct hdmi *hdmi) @@ -93,22 +94,18 @@ static int msm_hdmi_get_phy(struct hdmi *hdmi) } phy_pdev = of_find_device_by_node(phy_node); - if (phy_pdev) - hdmi->phy = platform_get_drvdata(phy_pdev); - of_node_put(phy_node); - if (!phy_pdev) { - DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n"); - return -EPROBE_DEFER; - } + if (!phy_pdev) + return dev_err_probe(&pdev->dev, -EPROBE_DEFER, "phy driver is not ready\n"); + + hdmi->phy = platform_get_drvdata(phy_pdev); if (!hdmi->phy) { - DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n"); put_device(&phy_pdev->dev); - return -EPROBE_DEFER; + return dev_err_probe(&pdev->dev, -EPROBE_DEFER, "phy driver is not ready\n"); } - hdmi->phy_dev = get_device(&phy_pdev->dev); + hdmi->phy_dev = &phy_pdev->dev; return 0; } @@ -117,142 +114,10 @@ static int msm_hdmi_get_phy(struct hdmi *hdmi) * we are to EPROBE_DEFER we want to do it here, rather than later * at modeset_init() time */ -static struct hdmi *msm_hdmi_init(struct platform_device *pdev) +static int msm_hdmi_init(struct hdmi *hdmi) { - struct hdmi_platform_config *config = pdev->dev.platform_data; - struct hdmi *hdmi = NULL; - struct resource *res; - int i, ret; - - hdmi = devm_kzalloc(&pdev->dev, sizeof(*hdmi), GFP_KERNEL); - if (!hdmi) { - ret = -ENOMEM; - goto fail; - } - - hdmi->pdev = pdev; - hdmi->config = config; - spin_lock_init(&hdmi->reg_lock); - - ret = drm_of_find_panel_or_bridge(pdev->dev.of_node, 1, 0, NULL, &hdmi->next_bridge); - if (ret && ret != -ENODEV) - goto fail; - - hdmi->mmio = msm_ioremap(pdev, config->mmio_name); - if (IS_ERR(hdmi->mmio)) { - ret = PTR_ERR(hdmi->mmio); - goto fail; - } - - /* HDCP needs physical address of hdmi register */ - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, - config->mmio_name); - if (!res) { - ret = -EINVAL; - goto fail; - } - hdmi->mmio_phy_addr = res->start; - - hdmi->qfprom_mmio = msm_ioremap(pdev, config->qfprom_mmio_name); - if (IS_ERR(hdmi->qfprom_mmio)) { - DRM_DEV_INFO(&pdev->dev, "can't find qfprom resource\n"); - hdmi->qfprom_mmio = NULL; - } - - hdmi->hpd_regs = devm_kcalloc(&pdev->dev, - config->hpd_reg_cnt, - sizeof(hdmi->hpd_regs[0]), - GFP_KERNEL); - if (!hdmi->hpd_regs) { - ret = -ENOMEM; - goto fail; - } - for (i = 0; i < config->hpd_reg_cnt; i++) - hdmi->hpd_regs[i].supply = config->hpd_reg_names[i]; - - ret = devm_regulator_bulk_get(&pdev->dev, config->hpd_reg_cnt, hdmi->hpd_regs); - if (ret) { - DRM_DEV_ERROR(&pdev->dev, "failed to get hpd regulator: %d\n", ret); - goto fail; - } - - hdmi->pwr_regs = devm_kcalloc(&pdev->dev, - config->pwr_reg_cnt, - sizeof(hdmi->pwr_regs[0]), - GFP_KERNEL); - if (!hdmi->pwr_regs) { - ret = -ENOMEM; - goto fail; - } - - for (i = 0; i < config->pwr_reg_cnt; i++) - hdmi->pwr_regs[i].supply = config->pwr_reg_names[i]; - - ret = devm_regulator_bulk_get(&pdev->dev, config->pwr_reg_cnt, hdmi->pwr_regs); - if (ret) { - DRM_DEV_ERROR(&pdev->dev, "failed to get pwr regulator: %d\n", ret); - goto fail; - } - - hdmi->hpd_clks = devm_kcalloc(&pdev->dev, - config->hpd_clk_cnt, - sizeof(hdmi->hpd_clks[0]), - GFP_KERNEL); - if (!hdmi->hpd_clks) { - ret = -ENOMEM; - goto fail; - } - for (i = 0; i < config->hpd_clk_cnt; i++) { - struct clk *clk; - - clk = msm_clk_get(pdev, config->hpd_clk_names[i]); - if (IS_ERR(clk)) { - ret = PTR_ERR(clk); - DRM_DEV_ERROR(&pdev->dev, "failed to get hpd clk: %s (%d)\n", - config->hpd_clk_names[i], ret); - goto fail; - } - - hdmi->hpd_clks[i] = clk; - } - - hdmi->pwr_clks = devm_kcalloc(&pdev->dev, - config->pwr_clk_cnt, - sizeof(hdmi->pwr_clks[0]), - GFP_KERNEL); - if (!hdmi->pwr_clks) { - ret = -ENOMEM; - goto fail; - } - for (i = 0; i < config->pwr_clk_cnt; i++) { - struct clk *clk; - - clk = msm_clk_get(pdev, config->pwr_clk_names[i]); - if (IS_ERR(clk)) { - ret = PTR_ERR(clk); - DRM_DEV_ERROR(&pdev->dev, "failed to get pwr clk: %s (%d)\n", - config->pwr_clk_names[i], ret); - goto fail; - } - - hdmi->pwr_clks[i] = clk; - } - - hdmi->hpd_gpiod = devm_gpiod_get_optional(&pdev->dev, "hpd", GPIOD_IN); - /* This will catch e.g. -EPROBE_DEFER */ - if (IS_ERR(hdmi->hpd_gpiod)) { - ret = PTR_ERR(hdmi->hpd_gpiod); - DRM_DEV_ERROR(&pdev->dev, "failed to get hpd gpio: (%d)\n", ret); - goto fail; - } - - if (!hdmi->hpd_gpiod) - DBG("failed to get HPD gpio"); - - if (hdmi->hpd_gpiod) - gpiod_set_consumer_name(hdmi->hpd_gpiod, "HDMI_HPD"); - - pm_runtime_enable(&pdev->dev); + struct platform_device *pdev = hdmi->pdev; + int ret; hdmi->workq = alloc_ordered_workqueue("msm_hdmi", 0); @@ -264,25 +129,18 @@ static struct hdmi *msm_hdmi_init(struct platform_device *pdev) goto fail; } - ret = msm_hdmi_get_phy(hdmi); - if (ret) { - DRM_DEV_ERROR(&pdev->dev, "failed to get phy\n"); - goto fail; - } - hdmi->hdcp_ctrl = msm_hdmi_hdcp_init(hdmi); if (IS_ERR(hdmi->hdcp_ctrl)) { dev_warn(&pdev->dev, "failed to init hdcp: disabled\n"); hdmi->hdcp_ctrl = NULL; } - return hdmi; + return 0; fail: - if (hdmi) - msm_hdmi_destroy(hdmi); + msm_hdmi_destroy(hdmi); - return ERR_PTR(ret); + return ret; } /* Second part of initialization, the drm/kms level modeset_init, @@ -297,9 +155,13 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi, struct drm_device *dev, struct drm_encoder *encoder) { struct msm_drm_private *priv = dev->dev_private; - struct platform_device *pdev = hdmi->pdev; int ret; + if (priv->num_bridges == ARRAY_SIZE(priv->bridges)) { + DRM_DEV_ERROR(dev->dev, "too many bridges\n"); + return -ENOSPC; + } + hdmi->dev = dev; hdmi->encoder = encoder; @@ -332,14 +194,7 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi, drm_connector_attach_encoder(hdmi->connector, hdmi->encoder); - hdmi->irq = irq_of_parse_and_map(pdev->dev.of_node, 0); - if (!hdmi->irq) { - ret = -EINVAL; - DRM_DEV_ERROR(dev->dev, "failed to get irq\n"); - goto fail; - } - - ret = devm_request_irq(&pdev->dev, hdmi->irq, + ret = devm_request_irq(dev->dev, hdmi->irq, msm_hdmi_irq, IRQF_TRIGGER_HIGH, "hdmi_isr", hdmi); if (ret < 0) { @@ -358,8 +213,6 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi, priv->bridges[priv->num_bridges++] = hdmi->bridge; - platform_set_drvdata(pdev, hdmi); - return 0; fail: @@ -387,7 +240,7 @@ fail: static const char *hpd_reg_names_8960[] = {"core-vdda"}; static const char *hpd_clk_names_8960[] = {"core", "master_iface", "slave_iface"}; -static struct hdmi_platform_config hdmi_tx_8960_config = { +static const struct hdmi_platform_config hdmi_tx_8960_config = { HDMI_CFG(hpd_reg, 8960), HDMI_CFG(hpd_clk, 8960), }; @@ -397,7 +250,7 @@ static const char *pwr_clk_names_8x74[] = {"extp", "alt_iface"}; static const char *hpd_clk_names_8x74[] = {"iface", "core", "mdp_core"}; static unsigned long hpd_clk_freq_8x74[] = {0, 19200000, 0}; -static struct hdmi_platform_config hdmi_tx_8974_config = { +static const struct hdmi_platform_config hdmi_tx_8974_config = { HDMI_CFG(pwr_reg, 8x74), HDMI_CFG(pwr_clk, 8x74), HDMI_CFG(hpd_clk, 8x74), @@ -512,26 +365,12 @@ static int msm_hdmi_register_audio_driver(struct hdmi *hdmi, struct device *dev) static int msm_hdmi_bind(struct device *dev, struct device *master, void *data) { struct msm_drm_private *priv = dev_get_drvdata(master); - struct hdmi_platform_config *hdmi_cfg; - struct hdmi *hdmi; - struct device_node *of_node = dev->of_node; + struct hdmi *hdmi = dev_get_drvdata(dev); int err; - hdmi_cfg = (struct hdmi_platform_config *) - of_device_get_match_data(dev); - if (!hdmi_cfg) { - DRM_DEV_ERROR(dev, "unknown hdmi_cfg: %pOFn\n", of_node); - return -ENXIO; - } - - hdmi_cfg->mmio_name = "core_physical"; - hdmi_cfg->qfprom_mmio_name = "qfprom_physical"; - - dev->platform_data = hdmi_cfg; - - hdmi = msm_hdmi_init(to_platform_device(dev)); - if (IS_ERR(hdmi)) - return PTR_ERR(hdmi); + err = msm_hdmi_init(hdmi); + if (err) + return err; priv->hdmi = hdmi; err = msm_hdmi_register_audio_driver(hdmi, dev); @@ -564,12 +403,150 @@ static const struct component_ops msm_hdmi_ops = { static int msm_hdmi_dev_probe(struct platform_device *pdev) { + const struct hdmi_platform_config *config; + struct device *dev = &pdev->dev; + struct hdmi *hdmi; + struct resource *res; + int i, ret; + + config = of_device_get_match_data(dev); + if (!config) + return -EINVAL; + + hdmi = devm_kzalloc(&pdev->dev, sizeof(*hdmi), GFP_KERNEL); + if (!hdmi) + return -ENOMEM; + + hdmi->pdev = pdev; + hdmi->config = config; + spin_lock_init(&hdmi->reg_lock); + + ret = drm_of_find_panel_or_bridge(pdev->dev.of_node, 1, 0, NULL, &hdmi->next_bridge); + if (ret && ret != -ENODEV) + return ret; + + hdmi->mmio = msm_ioremap(pdev, "core_physical"); + if (IS_ERR(hdmi->mmio)) + return PTR_ERR(hdmi->mmio); + + /* HDCP needs physical address of hdmi register */ + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + "core_physical"); + if (!res) + return -EINVAL; + hdmi->mmio_phy_addr = res->start; + + hdmi->qfprom_mmio = msm_ioremap(pdev, "qfprom_physical"); + if (IS_ERR(hdmi->qfprom_mmio)) { + DRM_DEV_INFO(&pdev->dev, "can't find qfprom resource\n"); + hdmi->qfprom_mmio = NULL; + } + + hdmi->irq = platform_get_irq(pdev, 0); + if (hdmi->irq < 0) + return hdmi->irq; + + hdmi->hpd_regs = devm_kcalloc(&pdev->dev, + config->hpd_reg_cnt, + sizeof(hdmi->hpd_regs[0]), + GFP_KERNEL); + if (!hdmi->hpd_regs) + return -ENOMEM; + + for (i = 0; i < config->hpd_reg_cnt; i++) + hdmi->hpd_regs[i].supply = config->hpd_reg_names[i]; + + ret = devm_regulator_bulk_get(&pdev->dev, config->hpd_reg_cnt, hdmi->hpd_regs); + if (ret) + return dev_err_probe(dev, ret, "failed to get hpd regulators\n"); + + hdmi->pwr_regs = devm_kcalloc(&pdev->dev, + config->pwr_reg_cnt, + sizeof(hdmi->pwr_regs[0]), + GFP_KERNEL); + if (!hdmi->pwr_regs) + return -ENOMEM; + + for (i = 0; i < config->pwr_reg_cnt; i++) + hdmi->pwr_regs[i].supply = config->pwr_reg_names[i]; + + ret = devm_regulator_bulk_get(&pdev->dev, config->pwr_reg_cnt, hdmi->pwr_regs); + if (ret) + return dev_err_probe(dev, ret, "failed to get pwr regulators\n"); + + hdmi->hpd_clks = devm_kcalloc(&pdev->dev, + config->hpd_clk_cnt, + sizeof(hdmi->hpd_clks[0]), + GFP_KERNEL); + if (!hdmi->hpd_clks) + return -ENOMEM; + + for (i = 0; i < config->hpd_clk_cnt; i++) { + struct clk *clk; + + clk = msm_clk_get(pdev, config->hpd_clk_names[i]); + if (IS_ERR(clk)) + return dev_err_probe(dev, PTR_ERR(clk), + "failed to get hpd clk: %s\n", + config->hpd_clk_names[i]); + + hdmi->hpd_clks[i] = clk; + } + + hdmi->pwr_clks = devm_kcalloc(&pdev->dev, + config->pwr_clk_cnt, + sizeof(hdmi->pwr_clks[0]), + GFP_KERNEL); + if (!hdmi->pwr_clks) + return -ENOMEM; + + for (i = 0; i < config->pwr_clk_cnt; i++) { + struct clk *clk; + + clk = msm_clk_get(pdev, config->pwr_clk_names[i]); + if (IS_ERR(clk)) + return dev_err_probe(dev, PTR_ERR(clk), + "failed to get pwr clk: %s\n", + config->pwr_clk_names[i]); + + hdmi->pwr_clks[i] = clk; + } + + hdmi->hpd_gpiod = devm_gpiod_get_optional(&pdev->dev, "hpd", GPIOD_IN); + /* This will catch e.g. -EPROBE_DEFER */ + if (IS_ERR(hdmi->hpd_gpiod)) + return dev_err_probe(dev, PTR_ERR(hdmi->hpd_gpiod), + "failed to get hpd gpio\n"); + + if (!hdmi->hpd_gpiod) + DBG("failed to get HPD gpio"); + + if (hdmi->hpd_gpiod) + gpiod_set_consumer_name(hdmi->hpd_gpiod, "HDMI_HPD"); + + ret = msm_hdmi_get_phy(hdmi); + if (ret) { + DRM_DEV_ERROR(&pdev->dev, "failed to get phy\n"); + return ret; + } + + ret = devm_pm_runtime_enable(&pdev->dev); + if (ret) + return ret; + + platform_set_drvdata(pdev, hdmi); + return component_add(&pdev->dev, &msm_hdmi_ops); } static int msm_hdmi_dev_remove(struct platform_device *pdev) { + struct hdmi *hdmi = dev_get_drvdata(&pdev->dev); + component_del(&pdev->dev, &msm_hdmi_ops); + + msm_hdmi_put_phy(hdmi); + return 0; } diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.h b/drivers/gpu/drm/msm/hdmi/hdmi.h index 04a74381aaf7..e8dbee50637f 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.h +++ b/drivers/gpu/drm/msm/hdmi/hdmi.h @@ -86,9 +86,6 @@ struct hdmi { /* platform config data (ie. from DT, or pdata) */ struct hdmi_platform_config { - const char *mmio_name; - const char *qfprom_mmio_name; - /* regulators that need to be on for hpd: */ const char **hpd_reg_names; int hpd_reg_cnt; diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 28034c21f6bc..8b0b0ac74a6f 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -247,6 +247,7 @@ static int msm_drm_uninit(struct device *dev) for (i = 0; i < priv->num_bridges; i++) drm_bridge_remove(priv->bridges[i]); + priv->num_bridges = 0; pm_runtime_get_sync(dev); msm_irq_uninstall(ddev); @@ -276,7 +277,6 @@ static int msm_drm_uninit(struct device *dev) struct msm_gem_address_space *msm_kms_init_aspace(struct drm_device *dev) { - struct iommu_domain *domain; struct msm_gem_address_space *aspace; struct msm_mmu *mmu; struct device *mdp_dev = dev->dev; @@ -292,22 +292,21 @@ struct msm_gem_address_space *msm_kms_init_aspace(struct drm_device *dev) else iommu_dev = mdss_dev; - domain = iommu_domain_alloc(iommu_dev->bus); - if (!domain) { + mmu = msm_iommu_new(iommu_dev, 0); + if (IS_ERR(mmu)) + return ERR_CAST(mmu); + + if (!mmu) { drm_info(dev, "no IOMMU, fallback to phys contig buffers for scanout\n"); return NULL; } - mmu = msm_iommu_new(iommu_dev, domain); - if (IS_ERR(mmu)) { - iommu_domain_free(domain); - return ERR_CAST(mmu); - } - aspace = msm_gem_address_space_create(mmu, "mdp_kms", 0x1000, 0x100000000 - 0x1000); - if (IS_ERR(aspace)) + if (IS_ERR(aspace)) { + dev_err(mdp_dev, "aspace create, error %pe\n", aspace); mmu->funcs->destroy(mmu); + } return aspace; } @@ -419,7 +418,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) priv->dev = ddev; priv->wq = alloc_ordered_workqueue("msm", 0); - priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD; INIT_LIST_HEAD(&priv->objects); mutex_init(&priv->obj_lock); @@ -818,6 +816,7 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data, case MSM_INFO_GET_OFFSET: case MSM_INFO_GET_IOVA: case MSM_INFO_SET_IOVA: + case MSM_INFO_GET_FLAGS: /* value returned as immediate, not pointer, so len==0: */ if (args->len) return -EINVAL; @@ -845,6 +844,15 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data, case MSM_INFO_SET_IOVA: ret = msm_ioctl_gem_info_set_iova(dev, file, obj, args->value); break; + case MSM_INFO_GET_FLAGS: + if (obj->import_attach) { + ret = -EINVAL; + break; + } + /* Hide internal kernel-only flags: */ + args->value = to_msm_bo(obj)->flags & MSM_BO_FLAGS; + ret = 0; + break; case MSM_INFO_SET_NAME: /* length check should leave room for terminating null: */ if (args->len >= sizeof(msm_obj->name)) { diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index b2ea262296a4..d4e0ef608950 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -224,7 +224,13 @@ struct msm_drm_private { struct drm_atomic_state *pm_state; - /* For hang detection, in ms */ + /** + * hangcheck_period: For hang detection, in ms + * + * Note that in practice, a submit/job will get at least two hangcheck + * periods, due to checking for progress being implemented as simply + * "have the CP position registers changed since last time?" + */ unsigned int hangcheck_period; /** diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 1de14e67f96b..051bdbc093cf 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -15,7 +15,7 @@ /* Default disabled for now until it has some more testing on the different * iommu combinations that can be paired with the driver: */ -static bool enable_eviction = false; +static bool enable_eviction = true; MODULE_PARM_DESC(enable_eviction, "Enable swappable GEM buffers"); module_param(enable_eviction, bool, 0600); diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 5599d93ec0d2..73a2ca122c57 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -334,8 +334,7 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) if (ret) return ret; - /* exclusive fences must be ordered */ - if (no_implicit && !write) + if (no_implicit) continue; ret = drm_sched_job_add_implicit_dependencies(&submit->base, @@ -501,11 +500,11 @@ out: */ static void submit_cleanup(struct msm_gem_submit *submit, bool error) { - unsigned cleanup_flags = BO_LOCKED | BO_OBJ_PINNED; + unsigned cleanup_flags = BO_LOCKED; unsigned i; if (error) - cleanup_flags |= BO_VMA_PINNED; + cleanup_flags |= BO_VMA_PINNED | BO_OBJ_PINNED; for (i = 0; i < submit->nr_bos; i++) { struct msm_gem_object *msm_obj = submit->bos[i].obj; @@ -706,7 +705,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct msm_drm_private *priv = dev->dev_private; struct drm_msm_gem_submit *args = data; struct msm_file_private *ctx = file->driver_priv; - struct msm_gem_submit *submit = NULL; + struct msm_gem_submit *submit; struct msm_gpu *gpu = priv->gpu; struct msm_gpu_submitqueue *queue; struct msm_ringbuffer *ring; @@ -946,8 +945,7 @@ out_unlock: put_unused_fd(out_fence_fd); mutex_unlock(&queue->lock); out_post_unlock: - if (submit) - msm_gem_submit_put(submit); + msm_gem_submit_put(submit); if (!IS_ERR_OR_NULL(post_deps)) { for (i = 0; i < args->nr_out_syncobjs; ++i) { kfree(post_deps[i].chain); diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 0098ee8438aa..30ed45af76ad 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -492,6 +492,21 @@ static void hangcheck_timer_reset(struct msm_gpu *gpu) round_jiffies_up(jiffies + msecs_to_jiffies(priv->hangcheck_period))); } +static bool made_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + if (ring->hangcheck_progress_retries >= DRM_MSM_HANGCHECK_PROGRESS_RETRIES) + return false; + + if (!gpu->funcs->progress) + return false; + + if (!gpu->funcs->progress(gpu, ring)) + return false; + + ring->hangcheck_progress_retries++; + return true; +} + static void hangcheck_handler(struct timer_list *t) { struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer); @@ -502,9 +517,12 @@ static void hangcheck_handler(struct timer_list *t) if (fence != ring->hangcheck_fence) { /* some progress has been made.. ya! */ ring->hangcheck_fence = fence; - } else if (fence_before(fence, ring->fctx->last_fence)) { + ring->hangcheck_progress_retries = 0; + } else if (fence_before(fence, ring->fctx->last_fence) && + !made_progress(gpu, ring)) { /* no progress and not done.. hung! */ ring->hangcheck_fence = fence; + ring->hangcheck_progress_retries = 0; DRM_DEV_ERROR(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", gpu->name, ring->id); DRM_DEV_ERROR(dev->dev, "%s: completed fence: %u\n", @@ -830,6 +848,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, const char *name, struct msm_gpu_config *config) { + struct msm_drm_private *priv = drm->dev_private; int i, ret, nr_rings = config->nr_rings; void *memptrs; uint64_t memptrs_iova; @@ -857,6 +876,16 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, kthread_init_work(&gpu->recover_work, recover_worker); kthread_init_work(&gpu->fault_work, fault_worker); + priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD; + + /* + * If progress detection is supported, halve the hangcheck timer + * duration, as it takes two iterations of the hangcheck handler + * to detect a hang. + */ + if (funcs->progress) + priv->hangcheck_period /= 2; + timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0); spin_lock_init(&gpu->perf_lock); @@ -997,4 +1026,6 @@ void msm_gpu_cleanup(struct msm_gpu *gpu) } msm_devfreq_cleanup(gpu); + + platform_set_drvdata(gpu->pdev, NULL); } diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index ff911e7305ce..651786bc55e5 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -78,6 +78,15 @@ struct msm_gpu_funcs { struct msm_gem_address_space *(*create_private_address_space) (struct msm_gpu *gpu); uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); + + /** + * progress: Has the GPU made progress? + * + * Return true if GPU position in cmdstream has advanced (or changed) + * since the last call. To avoid false negatives, this should account + * for cmdstream that is buffered in this FIFO upstream of the CP fw. + */ + bool (*progress)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); }; /* Additional state for iommu faults: */ @@ -237,6 +246,7 @@ struct msm_gpu { #define DRM_MSM_INACTIVE_PERIOD 66 /* in ms (roughly four frames) */ #define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */ +#define DRM_MSM_HANGCHECK_PROGRESS_RETRIES 3 struct timer_list hangcheck_timer; /* Fault info for most recent iova fault: */ @@ -280,6 +290,10 @@ struct msm_gpu { static inline struct msm_gpu *dev_to_gpu(struct device *dev) { struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(dev); + + if (!adreno_smmu) + return NULL; + return container_of(adreno_smmu, struct msm_gpu, adreno_smmu); } @@ -536,7 +550,7 @@ static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or) msm_rmw(gpu->mmio + (reg << 2), mask, or); } -static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi) +static inline u64 gpu_read64(struct msm_gpu *gpu, u32 reg) { u64 val; @@ -554,17 +568,17 @@ static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi) * when the lo is read, so make sure to read the lo first to trigger * that */ - val = (u64) msm_readl(gpu->mmio + (lo << 2)); - val |= ((u64) msm_readl(gpu->mmio + (hi << 2)) << 32); + val = (u64) msm_readl(gpu->mmio + (reg << 2)); + val |= ((u64) msm_readl(gpu->mmio + ((reg + 1) << 2)) << 32); return val; } -static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val) +static inline void gpu_write64(struct msm_gpu *gpu, u32 reg, u64 val) { /* Why not a writeq here? Read the screed above */ - msm_writel(lower_32_bits(val), gpu->mmio + (lo << 2)); - msm_writel(upper_32_bits(val), gpu->mmio + (hi << 2)); + msm_writel(lower_32_bits(val), gpu->mmio + (reg << 2)); + msm_writel(upper_32_bits(val), gpu->mmio + ((reg + 1) << 2)); } int msm_gpu_pm_suspend(struct msm_gpu *gpu); diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 5577cea7c009..c2507582ecf3 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -186,6 +186,13 @@ int msm_iommu_pagetable_params(struct msm_mmu *mmu, return 0; } +struct iommu_domain_geometry *msm_iommu_get_geometry(struct msm_mmu *mmu) +{ + struct msm_iommu *iommu = to_msm_iommu(mmu); + + return &iommu->domain->geometry; +} + static const struct msm_mmu_funcs pagetable_funcs = { .map = msm_iommu_pagetable_map, .unmap = msm_iommu_pagetable_unmap, @@ -367,17 +374,23 @@ static const struct msm_mmu_funcs funcs = { .resume_translation = msm_iommu_resume_translation, }; -struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain) +struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks) { + struct iommu_domain *domain; struct msm_iommu *iommu; int ret; + domain = iommu_domain_alloc(dev->bus); if (!domain) - return ERR_PTR(-ENODEV); + return NULL; + + iommu_set_pgtable_quirks(domain, quirks); iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); - if (!iommu) + if (!iommu) { + iommu_domain_free(domain); return ERR_PTR(-ENOMEM); + } iommu->domain = domain; msm_mmu_init(&iommu->base, dev, &funcs, MSM_MMU_IOMMU); @@ -386,6 +399,7 @@ struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain) ret = iommu_attach_device(iommu->domain, dev); if (ret) { + iommu_domain_free(domain); kfree(iommu); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index e13c5c12b775..86b28add1fff 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -22,6 +22,7 @@ #define HW_REV 0x0 #define HW_INTR_STATUS 0x0010 +#define UBWC_DEC_HW_VERSION 0x58 #define UBWC_STATIC 0x144 #define UBWC_CTRL_2 0x150 #define UBWC_PREDICTION_MODE 0x154 @@ -174,9 +175,63 @@ static int _msm_mdss_irq_domain_add(struct msm_mdss *msm_mdss) return 0; } +#define UBWC_1_0 0x10000000 +#define UBWC_2_0 0x20000000 +#define UBWC_3_0 0x30000000 +#define UBWC_4_0 0x40000000 + +static void msm_mdss_setup_ubwc_dec_20(struct msm_mdss *msm_mdss, + u32 ubwc_static) +{ + writel_relaxed(ubwc_static, msm_mdss->mmio + UBWC_STATIC); +} + +static void msm_mdss_setup_ubwc_dec_30(struct msm_mdss *msm_mdss, + unsigned int ubwc_version, + u32 ubwc_swizzle, + u32 highest_bank_bit, + u32 macrotile_mode) +{ + u32 value = (ubwc_swizzle & 0x1) | + (highest_bank_bit & 0x3) << 4 | + (macrotile_mode & 0x1) << 12; + + if (ubwc_version == UBWC_3_0) + value |= BIT(10); + + if (ubwc_version == UBWC_1_0) + value |= BIT(8); + + writel_relaxed(value, msm_mdss->mmio + UBWC_STATIC); +} + +static void msm_mdss_setup_ubwc_dec_40(struct msm_mdss *msm_mdss, + unsigned int ubwc_version, + u32 ubwc_swizzle, + u32 ubwc_static, + u32 highest_bank_bit, + u32 macrotile_mode) +{ + u32 value = (ubwc_swizzle & 0x7) | + (ubwc_static & 0x1) << 3 | + (highest_bank_bit & 0x7) << 4 | + (macrotile_mode & 0x1) << 12; + + writel_relaxed(value, msm_mdss->mmio + UBWC_STATIC); + + if (ubwc_version == UBWC_3_0) { + writel_relaxed(1, msm_mdss->mmio + UBWC_CTRL_2); + writel_relaxed(0, msm_mdss->mmio + UBWC_PREDICTION_MODE); + } else { + writel_relaxed(2, msm_mdss->mmio + UBWC_CTRL_2); + writel_relaxed(1, msm_mdss->mmio + UBWC_PREDICTION_MODE); + } +} + static int msm_mdss_enable(struct msm_mdss *msm_mdss) { int ret; + u32 hw_rev; /* * Several components have AXI clocks that can only be turned on if @@ -198,26 +253,39 @@ static int msm_mdss_enable(struct msm_mdss *msm_mdss) if (msm_mdss->is_mdp5) return 0; + hw_rev = readl_relaxed(msm_mdss->mmio + HW_REV); + dev_dbg(msm_mdss->dev, "HW_REV: 0x%x\n", hw_rev); + dev_dbg(msm_mdss->dev, "UBWC_DEC_HW_VERSION: 0x%x\n", + readl_relaxed(msm_mdss->mmio + UBWC_DEC_HW_VERSION)); + /* * ubwc config is part of the "mdss" region which is not accessible * from the rest of the driver. hardcode known configurations here + * + * Decoder version can be read from the UBWC_DEC_HW_VERSION reg, + * UBWC_n and the rest of params comes from hw_catalog. + * Unforunately this driver can not access hw catalog, so we have to + * hardcode them here. */ - switch (readl_relaxed(msm_mdss->mmio + HW_REV)) { + switch (hw_rev) { case DPU_HW_VER_500: case DPU_HW_VER_501: - writel_relaxed(0x420, msm_mdss->mmio + UBWC_STATIC); + msm_mdss_setup_ubwc_dec_30(msm_mdss, UBWC_3_0, 0, 2, 0); break; case DPU_HW_VER_600: - /* TODO: 0x102e for LP_DDR4 */ - writel_relaxed(0x103e, msm_mdss->mmio + UBWC_STATIC); - writel_relaxed(2, msm_mdss->mmio + UBWC_CTRL_2); - writel_relaxed(1, msm_mdss->mmio + UBWC_PREDICTION_MODE); + /* TODO: highest_bank_bit = 2 for LP_DDR4 */ + msm_mdss_setup_ubwc_dec_40(msm_mdss, UBWC_4_0, 6, 1, 3, 1); break; case DPU_HW_VER_620: - writel_relaxed(0x1e, msm_mdss->mmio + UBWC_STATIC); + /* UBWC_2_0 */ + msm_mdss_setup_ubwc_dec_20(msm_mdss, 0x1e); + break; + case DPU_HW_VER_630: + /* UBWC_2_0 */ + msm_mdss_setup_ubwc_dec_20(msm_mdss, 0x11f); break; case DPU_HW_VER_720: - writel_relaxed(0x101e, msm_mdss->mmio + UBWC_STATIC); + msm_mdss_setup_ubwc_dec_40(msm_mdss, UBWC_3_0, 6, 1, 1, 1); break; } @@ -445,6 +513,7 @@ static const struct of_device_id mdss_dt_match[] = { { .compatible = "qcom,sc7180-mdss" }, { .compatible = "qcom,sc7280-mdss" }, { .compatible = "qcom,sc8180x-mdss" }, + { .compatible = "qcom,sm6115-mdss" }, { .compatible = "qcom,sm8150-mdss" }, { .compatible = "qcom,sm8250-mdss" }, {} diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h index de158e1bf765..74cd81e701ff 100644 --- a/drivers/gpu/drm/msm/msm_mmu.h +++ b/drivers/gpu/drm/msm/msm_mmu.h @@ -40,7 +40,7 @@ static inline void msm_mmu_init(struct msm_mmu *mmu, struct device *dev, mmu->type = type; } -struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain); +struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks); struct msm_mmu *msm_gpummu_new(struct device *dev, struct msm_gpu *gpu); static inline void msm_mmu_set_fault_handler(struct msm_mmu *mmu, void *arg, @@ -58,5 +58,6 @@ void msm_gpummu_params(struct msm_mmu *mmu, dma_addr_t *pt_base, int msm_iommu_pagetable_params(struct msm_mmu *mmu, phys_addr_t *ttbr, int *asid); +struct iommu_domain_geometry *msm_iommu_get_geometry(struct msm_mmu *mmu); #endif /* __MSM_MMU_H__ */ diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index cad4c3525f0b..57a8e9564540 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -25,7 +25,8 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) msm_gem_lock(obj); msm_gem_unpin_vma_fenced(submit->bos[i].vma, fctx); - submit->bos[i].flags &= ~BO_VMA_PINNED; + msm_gem_unpin_locked(obj); + submit->bos[i].flags &= ~(BO_VMA_PINNED | BO_OBJ_PINNED); msm_gem_unlock(obj); } diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index 2a5045abe46e..698b333abccd 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -35,6 +35,11 @@ struct msm_rbmemptrs { volatile u64 ttbr0; }; +struct msm_cp_state { + uint64_t ib1_base, ib2_base; + uint32_t ib1_rem, ib2_rem; +}; + struct msm_ringbuffer { struct msm_gpu *gpu; int id; @@ -64,6 +69,29 @@ struct msm_ringbuffer { uint64_t memptrs_iova; struct msm_fence_context *fctx; + /** + * hangcheck_progress_retries: + * + * The number of extra hangcheck duration cycles that we have given + * due to it appearing that the GPU is making forward progress. + * + * For GPU generations which support progress detection (see. + * msm_gpu_funcs::progress()), if the GPU appears to be making progress + * (ie. the CP has advanced in the command stream, we'll allow up to + * DRM_MSM_HANGCHECK_PROGRESS_RETRIES expirations of the hangcheck timer + * before killing the job. But to detect progress we need two sample + * points, so the duration of the hangcheck timer is halved. In other + * words we'll let the submit run for up to: + * + * (DRM_MSM_HANGCHECK_DEFAULT_PERIOD / 2) * (DRM_MSM_HANGCHECK_PROGRESS_RETRIES + 1) + */ + int hangcheck_progress_retries; + + /** + * last_cp_state: The state of the CP at the last call to gpu->progress() + */ + struct msm_cp_state last_cp_state; + /* * preempt_lock protects preemption and serializes wptr updates against * preemption. Can be aquired from irq context. diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 2944228a8e2c..8a3b685c2fcc 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2500,6 +2500,7 @@ static const struct display_timing logictechno_lt161010_2nh_timing = { static const struct panel_desc logictechno_lt161010_2nh = { .timings = &logictechno_lt161010_2nh_timing, .num_timings = 1, + .bpc = 6, .size = { .width = 154, .height = 86, @@ -2529,6 +2530,7 @@ static const struct display_timing logictechno_lt170410_2whc_timing = { static const struct panel_desc logictechno_lt170410_2whc = { .timings = &logictechno_lt170410_2whc_timing, .num_timings = 1, + .bpc = 8, .size = { .width = 217, .height = 136, diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index e246d914e7f6..4e83a1891f3e 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -250,13 +250,22 @@ void panfrost_mmu_reset(struct panfrost_device *pfdev) static size_t get_pgsize(u64 addr, size_t size, size_t *count) { + /* + * io-pgtable only operates on multiple pages within a single table + * entry, so we need to split at boundaries of the table size, i.e. + * the next block size up. The distance from address A to the next + * boundary of block size B is logically B - A % B, but in unsigned + * two's complement where B is a power of two we get the equivalence + * B - A % B == (B - A) % B == (n * B - A) % B, and choose n = 0 :) + */ size_t blk_offset = -addr % SZ_2M; if (blk_offset || size < SZ_2M) { *count = min_not_zero(blk_offset, size) / SZ_4K; return SZ_4K; } - *count = size / SZ_2M; + blk_offset = -addr % SZ_1G ?: SZ_1G; + *count = min(blk_offset, size) / SZ_2M; return SZ_2M; } diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c index 1d99c9a2b56e..63bdc9f6fc24 100644 --- a/drivers/gpu/drm/radeon/radeon_bios.c +++ b/drivers/gpu/drm/radeon/radeon_bios.c @@ -227,6 +227,7 @@ static bool radeon_atrm_get_bios(struct radeon_device *rdev) if (!found) return false; + pci_dev_put(pdev); rdev->bios = kmalloc(size, GFP_KERNEL); if (!rdev->bios) { diff --git a/drivers/gpu/drm/rcar-du/Kconfig b/drivers/gpu/drm/rcar-du/Kconfig index f14686549cbe..b2bddbeca878 100644 --- a/drivers/gpu/drm/rcar-du/Kconfig +++ b/drivers/gpu/drm/rcar-du/Kconfig @@ -42,16 +42,21 @@ config DRM_RCAR_LVDS select DRM_KMS_HELPER select DRM_PANEL -config DRM_RCAR_MIPI_DSI - tristate "R-Car DU MIPI DSI Encoder Support" - depends on DRM && DRM_BRIDGE && OF - select DRM_MIPI_DSI +config DRM_RCAR_USE_MIPI_DSI + bool "R-Car DU MIPI DSI Encoder Support" + depends on DRM_BRIDGE && OF + default DRM_RCAR_DU help Enable support for the R-Car Display Unit embedded MIPI DSI encoders. +config DRM_RCAR_MIPI_DSI + def_tristate DRM_RCAR_DU + depends on DRM_RCAR_USE_MIPI_DSI + select DRM_MIPI_DSI + config DRM_RZG2L_MIPI_DSI tristate "RZ/G2L MIPI DSI Encoder Support" - depends on DRM_BRIDGE && OF + depends on DRM && DRM_BRIDGE && OF depends on ARCH_RENESAS || COMPILE_TEST select DRM_MIPI_DSI help diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c index 92b599b089f9..7901c3babc8c 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c @@ -746,7 +746,7 @@ static void dw_mipi_dsi_rockchip_config(struct dw_mipi_dsi_rockchip *dsi) static void dw_mipi_dsi_rockchip_set_lcdsel(struct dw_mipi_dsi_rockchip *dsi, int mux) { - if (dsi->cdata->lcdsel_grf_reg < 0) + if (dsi->cdata->lcdsel_grf_reg) regmap_write(dsi->grf_regmap, dsi->cdata->lcdsel_grf_reg, mux ? dsi->cdata->lcdsel_lit : dsi->cdata->lcdsel_big); } @@ -1045,23 +1045,31 @@ static int dw_mipi_dsi_rockchip_host_attach(void *priv_data, if (ret) { DRM_DEV_ERROR(dsi->dev, "Failed to register component: %d\n", ret); - return ret; + goto out; } second = dw_mipi_dsi_rockchip_find_second(dsi); - if (IS_ERR(second)) - return PTR_ERR(second); + if (IS_ERR(second)) { + ret = PTR_ERR(second); + goto out; + } if (second) { ret = component_add(second, &dw_mipi_dsi_rockchip_ops); if (ret) { DRM_DEV_ERROR(second, "Failed to register component: %d\n", ret); - return ret; + goto out; } } return 0; + +out: + mutex_lock(&dsi->usage_mutex); + dsi->usage_mode = DW_DSI_USAGE_IDLE; + mutex_unlock(&dsi->usage_mutex); + return ret; } static int dw_mipi_dsi_rockchip_host_detach(void *priv_data, @@ -1629,7 +1637,6 @@ static const struct rockchip_dw_dsi_chip_data rk3399_chip_data[] = { static const struct rockchip_dw_dsi_chip_data rk3568_chip_data[] = { { .reg = 0xfe060000, - .lcdsel_grf_reg = -1, .lanecfg1_grf_reg = RK3568_GRF_VO_CON2, .lanecfg1 = HIWORD_UPDATE(0, RK3568_DSI0_SKEWCALHS | RK3568_DSI0_FORCETXSTOPMODE | @@ -1639,7 +1646,6 @@ static const struct rockchip_dw_dsi_chip_data rk3568_chip_data[] = { }, { .reg = 0xfe070000, - .lcdsel_grf_reg = -1, .lanecfg1_grf_reg = RK3568_GRF_VO_CON3, .lanecfg1 = HIWORD_UPDATE(0, RK3568_DSI1_SKEWCALHS | RK3568_DSI1_FORCETXSTOPMODE | @@ -1675,5 +1681,11 @@ struct platform_driver dw_mipi_dsi_rockchip_driver = { .of_match_table = dw_mipi_dsi_rockchip_dt_ids, .pm = &dw_mipi_dsi_rockchip_pm_ops, .name = "dw-mipi-dsi-rockchip", + /* + * For dual-DSI display, one DSI pokes at the other DSI's + * drvdata in dw_mipi_dsi_rockchip_find_second(). This is not + * safe for asynchronous probe. + */ + .probe_type = PROBE_FORCE_SYNCHRONOUS, }, }; diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c index c14f88893868..2f4b8f64cbad 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c @@ -565,7 +565,8 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master, ret = rockchip_hdmi_parse_dt(hdmi); if (ret) { - DRM_DEV_ERROR(hdmi->dev, "Unable to parse OF data\n"); + if (ret != -EPROBE_DEFER) + DRM_DEV_ERROR(hdmi->dev, "Unable to parse OF data\n"); return ret; } diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c index bf1120e0f573..6edb7c52cb3d 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c @@ -365,9 +365,12 @@ rockchip_gem_create_with_handle(struct drm_file *file_priv, { struct rockchip_gem_object *rk_obj; struct drm_gem_object *obj; + bool is_framebuffer; int ret; - rk_obj = rockchip_gem_create_object(drm, size, false); + is_framebuffer = drm->fb_helper && file_priv == drm->fb_helper->client.file; + + rk_obj = rockchip_gem_create_object(drm, size, is_framebuffer); if (IS_ERR(rk_obj)) return ERR_CAST(rk_obj); diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index ad87db2fcaf6..8cecf81a5ae0 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -877,10 +877,14 @@ static void vop2_crtc_atomic_disable(struct drm_crtc *crtc, { struct vop2_video_port *vp = to_vop2_video_port(crtc); struct vop2 *vop2 = vp->vop2; + struct drm_crtc_state *old_crtc_state; int ret; vop2_lock(vop2); + old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc); + drm_atomic_helper_disable_planes_on_crtc(old_crtc_state, false); + drm_crtc_vblank_off(crtc); /* @@ -996,13 +1000,15 @@ static int vop2_plane_atomic_check(struct drm_plane *plane, static void vop2_plane_atomic_disable(struct drm_plane *plane, struct drm_atomic_state *state) { - struct drm_plane_state *old_pstate = drm_atomic_get_old_plane_state(state, plane); + struct drm_plane_state *old_pstate = NULL; struct vop2_win *win = to_vop2_win(plane); struct vop2 *vop2 = win->vop2; drm_dbg(vop2->drm, "%s disable\n", win->data->name); - if (!old_pstate->crtc) + if (state) + old_pstate = drm_atomic_get_old_plane_state(state, plane); + if (old_pstate && !old_pstate->crtc) return; vop2_win_disable(win); diff --git a/drivers/gpu/drm/sti/sti_dvo.c b/drivers/gpu/drm/sti/sti_dvo.c index f3a5616b7daf..577c477b5f46 100644 --- a/drivers/gpu/drm/sti/sti_dvo.c +++ b/drivers/gpu/drm/sti/sti_dvo.c @@ -346,8 +346,9 @@ static int sti_dvo_connector_get_modes(struct drm_connector *connector) #define CLK_TOLERANCE_HZ 50 -static int sti_dvo_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) +static enum drm_mode_status +sti_dvo_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) { int target = mode->clock * 1000; int target_min = target - CLK_TOLERANCE_HZ; diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c index ec6656b9ee7c..15097ac67931 100644 --- a/drivers/gpu/drm/sti/sti_hda.c +++ b/drivers/gpu/drm/sti/sti_hda.c @@ -601,8 +601,9 @@ static int sti_hda_connector_get_modes(struct drm_connector *connector) #define CLK_TOLERANCE_HZ 50 -static int sti_hda_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) +static enum drm_mode_status +sti_hda_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) { int target = mode->clock * 1000; int target_min = target - CLK_TOLERANCE_HZ; diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c index fcc2194869d6..8539fe1fedc4 100644 --- a/drivers/gpu/drm/sti/sti_hdmi.c +++ b/drivers/gpu/drm/sti/sti_hdmi.c @@ -1004,8 +1004,9 @@ fail: #define CLK_TOLERANCE_HZ 50 -static int sti_hdmi_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) +static enum drm_mode_status +sti_hdmi_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) { int target = mode->clock * 1000; int target_min = target - CLK_TOLERANCE_HZ; diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile index df6cc986aeba..bb0d2c144b55 100644 --- a/drivers/gpu/drm/tegra/Makefile +++ b/drivers/gpu/drm/tegra/Makefile @@ -24,7 +24,8 @@ tegra-drm-y := \ gr3d.o \ falcon.o \ vic.o \ - nvdec.o + nvdec.o \ + riscv.o tegra-drm-y += trace.o diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index bd0f60704467..a67453cee883 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -3205,8 +3205,10 @@ static int tegra_dc_probe(struct platform_device *pdev) usleep_range(2000, 4000); err = reset_control_assert(dc->rst); - if (err < 0) + if (err < 0) { + clk_disable_unprepare(dc->clk); return err; + } usleep_range(2000, 4000); diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 6748ec1e0005..7bd2e65c2a16 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1093,6 +1093,10 @@ static bool host1x_drm_wants_iommu(struct host1x_device *dev) struct host1x *host1x = dev_get_drvdata(dev->dev.parent); struct iommu_domain *domain; + /* Our IOMMU usage policy doesn't currently play well with GART */ + if (of_machine_is_compatible("nvidia,tegra20")) + return false; + /* * If the Tegra DRM clients are backed by an IOMMU, push buffers are * likely to be allocated beyond the 32-bit boundary if sufficient @@ -1382,6 +1386,7 @@ static const struct of_device_id host1x_drm_subdevs[] = { { .compatible = "nvidia,tegra194-vic", }, { .compatible = "nvidia,tegra194-nvdec", }, { .compatible = "nvidia,tegra234-vic", }, + { .compatible = "nvidia,tegra234-nvdec", }, { /* sentinel */ } }; diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c index bf240767dad9..40ec3e6cf204 100644 --- a/drivers/gpu/drm/tegra/hdmi.c +++ b/drivers/gpu/drm/tegra/hdmi.c @@ -867,14 +867,7 @@ static int tegra_hdmi_reconfigure_audio(struct tegra_hdmi *hdmi) static bool tegra_output_is_hdmi(struct tegra_output *output) { - struct edid *edid; - - if (!output->connector.edid_blob_ptr) - return false; - - edid = (struct edid *)output->connector.edid_blob_ptr->data; - - return drm_detect_hdmi_monitor(edid); + return output->connector.display_info.is_hdmi; } static enum drm_connector_status diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c index 276fe0472730..10fd21517281 100644 --- a/drivers/gpu/drm/tegra/nvdec.c +++ b/drivers/gpu/drm/tegra/nvdec.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2015-2021, NVIDIA Corporation. + * Copyright (c) 2015-2022, NVIDIA Corporation. */ #include <linux/clk.h> @@ -8,6 +8,7 @@ #include <linux/dma-mapping.h> #include <linux/host1x.h> #include <linux/iommu.h> +#include <linux/iopoll.h> #include <linux/module.h> #include <linux/of.h> #include <linux/of_device.h> @@ -16,18 +17,22 @@ #include <linux/pm_runtime.h> #include <linux/reset.h> -#include <soc/tegra/pmc.h> +#include <soc/tegra/mc.h> #include "drm.h" #include "falcon.h" +#include "riscv.h" #include "vic.h" +#define NVDEC_FALCON_DEBUGINFO 0x1094 #define NVDEC_TFBIF_TRANSCFG 0x2c44 struct nvdec_config { const char *firmware; unsigned int version; bool supports_sid; + bool has_riscv; + bool has_extra_clocks; }; struct nvdec { @@ -37,10 +42,16 @@ struct nvdec { struct tegra_drm_client client; struct host1x_channel *channel; struct device *dev; - struct clk *clk; + struct clk_bulk_data clks[3]; + unsigned int num_clks; + struct reset_control *reset; /* Platform configuration */ const struct nvdec_config *config; + + /* RISC-V specific data */ + struct tegra_drm_riscv riscv; + phys_addr_t carveout_base; }; static inline struct nvdec *to_nvdec(struct tegra_drm_client *client) @@ -54,7 +65,7 @@ static inline void nvdec_writel(struct nvdec *nvdec, u32 value, writel(value, nvdec->regs + offset); } -static int nvdec_boot(struct nvdec *nvdec) +static int nvdec_boot_falcon(struct nvdec *nvdec) { #ifdef CONFIG_IOMMU_API struct iommu_fwspec *spec = dev_iommu_fwspec_get(nvdec->dev); @@ -90,6 +101,64 @@ static int nvdec_boot(struct nvdec *nvdec) return 0; } +static int nvdec_wait_debuginfo(struct nvdec *nvdec, const char *phase) +{ + int err; + u32 val; + + err = readl_poll_timeout(nvdec->regs + NVDEC_FALCON_DEBUGINFO, val, val == 0x0, 10, 100000); + if (err) { + dev_err(nvdec->dev, "failed to boot %s, debuginfo=0x%x\n", phase, val); + return err; + } + + return 0; +} + +static int nvdec_boot_riscv(struct nvdec *nvdec) +{ + int err; + + err = reset_control_acquire(nvdec->reset); + if (err) + return err; + + nvdec_writel(nvdec, 0xabcd1234, NVDEC_FALCON_DEBUGINFO); + + err = tegra_drm_riscv_boot_bootrom(&nvdec->riscv, nvdec->carveout_base, 1, + &nvdec->riscv.bl_desc); + if (err) { + dev_err(nvdec->dev, "failed to execute bootloader\n"); + goto release_reset; + } + + err = nvdec_wait_debuginfo(nvdec, "bootloader"); + if (err) + goto release_reset; + + err = reset_control_reset(nvdec->reset); + if (err) + goto release_reset; + + nvdec_writel(nvdec, 0xabcd1234, NVDEC_FALCON_DEBUGINFO); + + err = tegra_drm_riscv_boot_bootrom(&nvdec->riscv, nvdec->carveout_base, 1, + &nvdec->riscv.os_desc); + if (err) { + dev_err(nvdec->dev, "failed to execute firmware\n"); + goto release_reset; + } + + err = nvdec_wait_debuginfo(nvdec, "firmware"); + if (err) + goto release_reset; + +release_reset: + reset_control_release(nvdec->reset); + + return err; +} + static int nvdec_init(struct host1x_client *client) { struct tegra_drm_client *drm = host1x_to_drm_client(client); @@ -189,7 +258,7 @@ static const struct host1x_client_ops nvdec_client_ops = { .exit = nvdec_exit, }; -static int nvdec_load_firmware(struct nvdec *nvdec) +static int nvdec_load_falcon_firmware(struct nvdec *nvdec) { struct host1x_client *client = &nvdec->client.base; struct tegra_drm *tegra = nvdec->client.drm; @@ -252,30 +321,35 @@ cleanup: return err; } - static __maybe_unused int nvdec_runtime_resume(struct device *dev) { struct nvdec *nvdec = dev_get_drvdata(dev); int err; - err = clk_prepare_enable(nvdec->clk); + err = clk_bulk_prepare_enable(nvdec->num_clks, nvdec->clks); if (err < 0) return err; usleep_range(10, 20); - err = nvdec_load_firmware(nvdec); - if (err < 0) - goto disable; + if (nvdec->config->has_riscv) { + err = nvdec_boot_riscv(nvdec); + if (err < 0) + goto disable; + } else { + err = nvdec_load_falcon_firmware(nvdec); + if (err < 0) + goto disable; - err = nvdec_boot(nvdec); - if (err < 0) - goto disable; + err = nvdec_boot_falcon(nvdec); + if (err < 0) + goto disable; + } return 0; disable: - clk_disable_unprepare(nvdec->clk); + clk_bulk_disable_unprepare(nvdec->num_clks, nvdec->clks); return err; } @@ -285,7 +359,7 @@ static __maybe_unused int nvdec_runtime_suspend(struct device *dev) host1x_channel_stop(nvdec->channel); - clk_disable_unprepare(nvdec->clk); + clk_bulk_disable_unprepare(nvdec->num_clks, nvdec->clks); return 0; } @@ -346,10 +420,18 @@ static const struct nvdec_config nvdec_t194_config = { .supports_sid = true, }; +static const struct nvdec_config nvdec_t234_config = { + .version = 0x23, + .supports_sid = true, + .has_riscv = true, + .has_extra_clocks = true, +}; + static const struct of_device_id tegra_nvdec_of_match[] = { { .compatible = "nvidia,tegra210-nvdec", .data = &nvdec_t210_config }, { .compatible = "nvidia,tegra186-nvdec", .data = &nvdec_t186_config }, { .compatible = "nvidia,tegra194-nvdec", .data = &nvdec_t194_config }, + { .compatible = "nvidia,tegra234-nvdec", .data = &nvdec_t234_config }, { }, }; MODULE_DEVICE_TABLE(of, tegra_nvdec_of_match); @@ -383,13 +465,22 @@ static int nvdec_probe(struct platform_device *pdev) if (IS_ERR(nvdec->regs)) return PTR_ERR(nvdec->regs); - nvdec->clk = devm_clk_get(dev, NULL); - if (IS_ERR(nvdec->clk)) { - dev_err(&pdev->dev, "failed to get clock\n"); - return PTR_ERR(nvdec->clk); + nvdec->clks[0].id = "nvdec"; + nvdec->num_clks = 1; + + if (nvdec->config->has_extra_clocks) { + nvdec->num_clks = 3; + nvdec->clks[1].id = "fuse"; + nvdec->clks[2].id = "tsec_pka"; } - err = clk_set_rate(nvdec->clk, ULONG_MAX); + err = devm_clk_bulk_get(dev, nvdec->num_clks, nvdec->clks); + if (err) { + dev_err(&pdev->dev, "failed to get clock(s)\n"); + return err; + } + + err = clk_set_rate(nvdec->clks[0].clk, ULONG_MAX); if (err < 0) { dev_err(&pdev->dev, "failed to set clock rate\n"); return err; @@ -399,12 +490,42 @@ static int nvdec_probe(struct platform_device *pdev) if (err < 0) host_class = HOST1X_CLASS_NVDEC; - nvdec->falcon.dev = dev; - nvdec->falcon.regs = nvdec->regs; + if (nvdec->config->has_riscv) { + struct tegra_mc *mc; - err = falcon_init(&nvdec->falcon); - if (err < 0) - return err; + mc = devm_tegra_memory_controller_get(dev); + if (IS_ERR(mc)) { + dev_err_probe(dev, PTR_ERR(mc), + "failed to get memory controller handle\n"); + return PTR_ERR(mc); + } + + err = tegra_mc_get_carveout_info(mc, 1, &nvdec->carveout_base, NULL); + if (err) { + dev_err(dev, "failed to get carveout info: %d\n", err); + return err; + } + + nvdec->reset = devm_reset_control_get_exclusive_released(dev, "nvdec"); + if (IS_ERR(nvdec->reset)) { + dev_err_probe(dev, PTR_ERR(nvdec->reset), "failed to get reset\n"); + return PTR_ERR(nvdec->reset); + } + + nvdec->riscv.dev = dev; + nvdec->riscv.regs = nvdec->regs; + + err = tegra_drm_riscv_read_descriptors(&nvdec->riscv); + if (err < 0) + return err; + } else { + nvdec->falcon.dev = dev; + nvdec->falcon.regs = nvdec->regs; + + err = falcon_init(&nvdec->falcon); + if (err < 0) + return err; + } platform_set_drvdata(pdev, nvdec); diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c index 47d26b5d9945..a8925dcd7edd 100644 --- a/drivers/gpu/drm/tegra/output.c +++ b/drivers/gpu/drm/tegra/output.c @@ -133,11 +133,11 @@ int tegra_output_probe(struct tegra_output *output) } } - output->hpd_gpio = devm_gpiod_get_from_of_node(output->dev, - output->of_node, - "nvidia,hpd-gpio", 0, - GPIOD_IN, - "HDMI hotplug detect"); + output->hpd_gpio = devm_fwnode_gpiod_get(output->dev, + of_fwnode_handle(output->of_node), + "nvidia,hpd", + GPIOD_IN, + "HDMI hotplug detect"); if (IS_ERR(output->hpd_gpio)) { if (PTR_ERR(output->hpd_gpio) != -ENOENT) return PTR_ERR(output->hpd_gpio); diff --git a/drivers/gpu/drm/tegra/riscv.c b/drivers/gpu/drm/tegra/riscv.c new file mode 100644 index 000000000000..6580416408f8 --- /dev/null +++ b/drivers/gpu/drm/tegra/riscv.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, NVIDIA Corporation. + */ + +#include <linux/dev_printk.h> +#include <linux/device.h> +#include <linux/iopoll.h> +#include <linux/of.h> + +#include "riscv.h" + +#define RISCV_CPUCTL 0x4388 +#define RISCV_CPUCTL_STARTCPU_TRUE (1 << 0) +#define RISCV_BR_RETCODE 0x465c +#define RISCV_BR_RETCODE_RESULT_V(x) ((x) & 0x3) +#define RISCV_BR_RETCODE_RESULT_PASS_V 3 +#define RISCV_BCR_CTRL 0x4668 +#define RISCV_BCR_CTRL_CORE_SELECT_RISCV (1 << 4) +#define RISCV_BCR_DMACFG 0x466c +#define RISCV_BCR_DMACFG_TARGET_LOCAL_FB (0 << 0) +#define RISCV_BCR_DMACFG_LOCK_LOCKED (1 << 31) +#define RISCV_BCR_DMAADDR_PKCPARAM_LO 0x4670 +#define RISCV_BCR_DMAADDR_PKCPARAM_HI 0x4674 +#define RISCV_BCR_DMAADDR_FMCCODE_LO 0x4678 +#define RISCV_BCR_DMAADDR_FMCCODE_HI 0x467c +#define RISCV_BCR_DMAADDR_FMCDATA_LO 0x4680 +#define RISCV_BCR_DMAADDR_FMCDATA_HI 0x4684 +#define RISCV_BCR_DMACFG_SEC 0x4694 +#define RISCV_BCR_DMACFG_SEC_GSCID(v) ((v) << 16) + +static void riscv_writel(struct tegra_drm_riscv *riscv, u32 value, u32 offset) +{ + writel(value, riscv->regs + offset); +} + +int tegra_drm_riscv_read_descriptors(struct tegra_drm_riscv *riscv) +{ + struct tegra_drm_riscv_descriptor *bl = &riscv->bl_desc; + struct tegra_drm_riscv_descriptor *os = &riscv->os_desc; + const struct device_node *np = riscv->dev->of_node; + int err; + +#define READ_PROP(name, location) \ + err = of_property_read_u32(np, name, location); \ + if (err) { \ + dev_err(riscv->dev, "failed to read " name ": %d\n", err); \ + return err; \ + } + + READ_PROP("nvidia,bl-manifest-offset", &bl->manifest_offset); + READ_PROP("nvidia,bl-code-offset", &bl->code_offset); + READ_PROP("nvidia,bl-data-offset", &bl->data_offset); + READ_PROP("nvidia,os-manifest-offset", &os->manifest_offset); + READ_PROP("nvidia,os-code-offset", &os->code_offset); + READ_PROP("nvidia,os-data-offset", &os->data_offset); +#undef READ_PROP + + if (bl->manifest_offset == 0 && bl->code_offset == 0 && + bl->data_offset == 0 && os->manifest_offset == 0 && + os->code_offset == 0 && os->data_offset == 0) { + dev_err(riscv->dev, "descriptors not available\n"); + return -EINVAL; + } + + return 0; +} + +int tegra_drm_riscv_boot_bootrom(struct tegra_drm_riscv *riscv, phys_addr_t image_address, + u32 gscid, const struct tegra_drm_riscv_descriptor *desc) +{ + phys_addr_t addr; + int err; + u32 val; + + riscv_writel(riscv, RISCV_BCR_CTRL_CORE_SELECT_RISCV, RISCV_BCR_CTRL); + + addr = image_address + desc->manifest_offset; + riscv_writel(riscv, lower_32_bits(addr >> 8), RISCV_BCR_DMAADDR_PKCPARAM_LO); + riscv_writel(riscv, upper_32_bits(addr >> 8), RISCV_BCR_DMAADDR_PKCPARAM_HI); + + addr = image_address + desc->code_offset; + riscv_writel(riscv, lower_32_bits(addr >> 8), RISCV_BCR_DMAADDR_FMCCODE_LO); + riscv_writel(riscv, upper_32_bits(addr >> 8), RISCV_BCR_DMAADDR_FMCCODE_HI); + + addr = image_address + desc->data_offset; + riscv_writel(riscv, lower_32_bits(addr >> 8), RISCV_BCR_DMAADDR_FMCDATA_LO); + riscv_writel(riscv, upper_32_bits(addr >> 8), RISCV_BCR_DMAADDR_FMCDATA_HI); + + riscv_writel(riscv, RISCV_BCR_DMACFG_SEC_GSCID(gscid), RISCV_BCR_DMACFG_SEC); + riscv_writel(riscv, + RISCV_BCR_DMACFG_TARGET_LOCAL_FB | RISCV_BCR_DMACFG_LOCK_LOCKED, RISCV_BCR_DMACFG); + + riscv_writel(riscv, RISCV_CPUCTL_STARTCPU_TRUE, RISCV_CPUCTL); + + err = readl_poll_timeout( + riscv->regs + RISCV_BR_RETCODE, val, + RISCV_BR_RETCODE_RESULT_V(val) == RISCV_BR_RETCODE_RESULT_PASS_V, + 10, 100000); + if (err) { + dev_err(riscv->dev, "error during bootrom execution. BR_RETCODE=%d\n", val); + return err; + } + + return 0; +} diff --git a/drivers/gpu/drm/tegra/riscv.h b/drivers/gpu/drm/tegra/riscv.h new file mode 100644 index 000000000000..bbeb2db078b6 --- /dev/null +++ b/drivers/gpu/drm/tegra/riscv.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022, NVIDIA Corporation. + */ + +#ifndef DRM_TEGRA_RISCV_H +#define DRM_TEGRA_RISCV_H + +struct tegra_drm_riscv_descriptor { + u32 manifest_offset; + u32 code_offset; + u32 code_size; + u32 data_offset; + u32 data_size; +}; + +struct tegra_drm_riscv { + /* User initializes */ + struct device *dev; + void __iomem *regs; + + struct tegra_drm_riscv_descriptor bl_desc; + struct tegra_drm_riscv_descriptor os_desc; +}; + +int tegra_drm_riscv_read_descriptors(struct tegra_drm_riscv *riscv); +int tegra_drm_riscv_boot_bootrom(struct tegra_drm_riscv *riscv, phys_addr_t image_address, + u32 gscid, const struct tegra_drm_riscv_descriptor *desc); + +#endif diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c index b24738bdf3df..066f88564169 100644 --- a/drivers/gpu/drm/tegra/submit.c +++ b/drivers/gpu/drm/tegra/submit.c @@ -133,7 +133,7 @@ static void gather_bo_munmap(struct host1x_bo *host_bo, void *addr) { } -const struct host1x_bo_ops gather_bo_ops = { +static const struct host1x_bo_ops gather_bo_ops = { .get = gather_bo_get, .put = gather_bo_put, .pin = gather_bo_pin, @@ -169,14 +169,9 @@ static void *alloc_copy_user_array(void __user *from, size_t count, size_t size) if (copy_len > 0x4000) return ERR_PTR(-E2BIG); - data = kvmalloc(copy_len, GFP_KERNEL); - if (!data) - return ERR_PTR(-ENOMEM); - - if (copy_from_user(data, from, copy_len)) { - kvfree(data); - return ERR_PTR(-EFAULT); - } + data = vmemdup_user(from, copy_len); + if (IS_ERR(data)) + return ERR_CAST(data); return data; } diff --git a/drivers/gpu/drm/tegra/uapi.c b/drivers/gpu/drm/tegra/uapi.c index a98239cb0e29..5adab6b22916 100644 --- a/drivers/gpu/drm/tegra/uapi.c +++ b/drivers/gpu/drm/tegra/uapi.c @@ -116,7 +116,7 @@ int tegra_drm_ioctl_channel_open(struct drm_device *drm, void *data, struct drm_ if (supported) context->memory_context = host1x_memory_context_alloc( - host, get_task_pid(current, PIDTYPE_TGID)); + host, client->base.dev, get_task_pid(current, PIDTYPE_TGID)); if (IS_ERR(context->memory_context)) { if (PTR_ERR(context->memory_context) != -EOPNOTSUPP) { diff --git a/drivers/gpu/drm/tests/drm_format_helper_test.c b/drivers/gpu/drm/tests/drm_format_helper_test.c index 2191e57f2297..567c71f95edc 100644 --- a/drivers/gpu/drm/tests/drm_format_helper_test.c +++ b/drivers/gpu/drm/tests/drm_format_helper_test.c @@ -315,7 +315,7 @@ static void drm_test_fb_xrgb8888_to_gray8(struct kunit *test) iosys_map_set_vaddr(&src, xrgb8888); drm_fb_xrgb8888_to_gray8(&dst, &result->dst_pitch, &src, &fb, ¶ms->clip); - KUNIT_EXPECT_EQ(test, memcmp(buf, result->expected, dst_size), 0); + KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); } static void drm_test_fb_xrgb8888_to_rgb332(struct kunit *test) @@ -345,7 +345,7 @@ static void drm_test_fb_xrgb8888_to_rgb332(struct kunit *test) iosys_map_set_vaddr(&src, xrgb8888); drm_fb_xrgb8888_to_rgb332(&dst, &result->dst_pitch, &src, &fb, ¶ms->clip); - KUNIT_EXPECT_EQ(test, memcmp(buf, result->expected, dst_size), 0); + KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); } static void drm_test_fb_xrgb8888_to_rgb565(struct kunit *test) @@ -375,10 +375,10 @@ static void drm_test_fb_xrgb8888_to_rgb565(struct kunit *test) iosys_map_set_vaddr(&src, xrgb8888); drm_fb_xrgb8888_to_rgb565(&dst, &result->dst_pitch, &src, &fb, ¶ms->clip, false); - KUNIT_EXPECT_EQ(test, memcmp(buf, result->expected, dst_size), 0); + KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); drm_fb_xrgb8888_to_rgb565(&dst, &result->dst_pitch, &src, &fb, ¶ms->clip, true); - KUNIT_EXPECT_EQ(test, memcmp(buf, result->expected_swab, dst_size), 0); + KUNIT_EXPECT_MEMEQ(test, buf, result->expected_swab, dst_size); } static void drm_test_fb_xrgb8888_to_rgb888(struct kunit *test) @@ -408,7 +408,7 @@ static void drm_test_fb_xrgb8888_to_rgb888(struct kunit *test) iosys_map_set_vaddr(&src, xrgb8888); drm_fb_xrgb8888_to_rgb888(&dst, &result->dst_pitch, &src, &fb, ¶ms->clip); - KUNIT_EXPECT_EQ(test, memcmp(buf, result->expected, dst_size), 0); + KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); } static void drm_test_fb_xrgb8888_to_xrgb2101010(struct kunit *test) @@ -439,7 +439,7 @@ static void drm_test_fb_xrgb8888_to_xrgb2101010(struct kunit *test) drm_fb_xrgb8888_to_xrgb2101010(&dst, &result->dst_pitch, &src, &fb, ¶ms->clip); buf = le32buf_to_cpu(test, buf, dst_size / sizeof(u32)); - KUNIT_EXPECT_EQ(test, memcmp(buf, result->expected, dst_size), 0); + KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); } static struct kunit_case drm_format_helper_test_cases[] = { diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index b66bf7aea632..5990d8f8c363 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -476,7 +476,12 @@ static int __init vc4_drm_register(void) if (ret) return ret; - return platform_driver_register(&vc4_platform_driver); + ret = platform_driver_register(&vc4_platform_driver); + if (ret) + platform_unregister_drivers(component_drivers, + ARRAY_SIZE(component_drivers)); + + return ret; } static void __exit vc4_drm_unregister(void) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 6b223a5fcf6f..12a00d644b61 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -349,27 +349,40 @@ static int vc4_hdmi_reset_link(struct drm_connector *connector, return 0; vc4_hdmi = connector_to_vc4_hdmi(connector); - if (!vc4_hdmi_supports_scrambling(vc4_hdmi)) + mutex_lock(&vc4_hdmi->mutex); + + if (!vc4_hdmi_supports_scrambling(vc4_hdmi)) { + mutex_unlock(&vc4_hdmi->mutex); return 0; + } scrambling_needed = vc4_hdmi_mode_needs_scrambling(&vc4_hdmi->saved_adjusted_mode, vc4_hdmi->output_bpc, vc4_hdmi->output_format); - if (!scrambling_needed) + if (!scrambling_needed) { + mutex_unlock(&vc4_hdmi->mutex); return 0; + } if (conn_state->commit && - !try_wait_for_completion(&conn_state->commit->hw_done)) + !try_wait_for_completion(&conn_state->commit->hw_done)) { + mutex_unlock(&vc4_hdmi->mutex); return 0; + } ret = drm_scdc_readb(connector->ddc, SCDC_TMDS_CONFIG, &config); if (ret < 0) { drm_err(drm, "Failed to read TMDS config: %d\n", ret); + mutex_unlock(&vc4_hdmi->mutex); return 0; } - if (!!(config & SCDC_SCRAMBLING_ENABLE) == scrambling_needed) + if (!!(config & SCDC_SCRAMBLING_ENABLE) == scrambling_needed) { + mutex_unlock(&vc4_hdmi->mutex); return 0; + } + + mutex_unlock(&vc4_hdmi->mutex); /* * HDMI 2.0 says that one should not send scrambled data @@ -397,9 +410,8 @@ static void vc4_hdmi_handle_hotplug(struct vc4_hdmi *vc4_hdmi, * .adap_enable, which leads to that funtion being called with * our mutex held. * - * A similar situation occurs with - * drm_atomic_helper_connector_hdmi_reset_link() that will call - * into our KMS hooks if the scrambling was enabled. + * A similar situation occurs with vc4_hdmi_reset_link() that + * will call into our KMS hooks if the scrambling was enabled. * * Concurrency isn't an issue at the moment since we don't share * any state with any of the other frameworks so we can ignore @@ -3169,9 +3181,16 @@ static int vc4_hdmi_init_resources(struct drm_device *drm, DRM_ERROR("Failed to get HDMI state machine clock\n"); return PTR_ERR(vc4_hdmi->hsm_clock); } + vc4_hdmi->audio_clock = vc4_hdmi->hsm_clock; vc4_hdmi->cec_clock = vc4_hdmi->hsm_clock; + vc4_hdmi->hsm_rpm_clock = devm_clk_get(dev, "hdmi"); + if (IS_ERR(vc4_hdmi->hsm_rpm_clock)) { + DRM_ERROR("Failed to get HDMI state machine clock\n"); + return PTR_ERR(vc4_hdmi->hsm_rpm_clock); + } + return 0; } @@ -3254,6 +3273,12 @@ static int vc5_hdmi_init_resources(struct drm_device *drm, return PTR_ERR(vc4_hdmi->hsm_clock); } + vc4_hdmi->hsm_rpm_clock = devm_clk_get(dev, "hdmi"); + if (IS_ERR(vc4_hdmi->hsm_rpm_clock)) { + DRM_ERROR("Failed to get HDMI state machine clock\n"); + return PTR_ERR(vc4_hdmi->hsm_rpm_clock); + } + vc4_hdmi->pixel_bvb_clock = devm_clk_get(dev, "bvb"); if (IS_ERR(vc4_hdmi->pixel_bvb_clock)) { DRM_ERROR("Failed to get pixel bvb clock\n"); @@ -3317,7 +3342,7 @@ static int vc4_hdmi_runtime_suspend(struct device *dev) { struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); - clk_disable_unprepare(vc4_hdmi->hsm_clock); + clk_disable_unprepare(vc4_hdmi->hsm_rpm_clock); return 0; } @@ -3335,11 +3360,11 @@ static int vc4_hdmi_runtime_resume(struct device *dev) * its frequency while the power domain is active so that it * keeps its rate. */ - ret = clk_set_min_rate(vc4_hdmi->hsm_clock, HSM_MIN_CLOCK_FREQ); + ret = clk_set_min_rate(vc4_hdmi->hsm_rpm_clock, HSM_MIN_CLOCK_FREQ); if (ret) return ret; - ret = clk_prepare_enable(vc4_hdmi->hsm_clock); + ret = clk_prepare_enable(vc4_hdmi->hsm_rpm_clock); if (ret) return ret; @@ -3352,7 +3377,7 @@ static int vc4_hdmi_runtime_resume(struct device *dev) * case, it will lead to a silent CPU stall. Let's make sure we * prevent such a case. */ - rate = clk_get_rate(vc4_hdmi->hsm_clock); + rate = clk_get_rate(vc4_hdmi->hsm_rpm_clock); if (!rate) { ret = -EINVAL; goto err_disable_clk; diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.h b/drivers/gpu/drm/vc4/vc4_hdmi.h index e3619836ca17..dc3ccd8002a0 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.h +++ b/drivers/gpu/drm/vc4/vc4_hdmi.h @@ -164,6 +164,7 @@ struct vc4_hdmi { struct clk *cec_clock; struct clk *pixel_clock; struct clk *hsm_clock; + struct clk *hsm_rpm_clock; struct clk *audio_clock; struct clk *pixel_bvb_clock; diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 5c97642ed66a..8fbeecdf2ec4 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -197,8 +197,8 @@ vc4_hvs_get_new_global_state(struct drm_atomic_state *state) struct drm_private_state *priv_state; priv_state = drm_atomic_get_new_private_obj_state(state, &vc4->hvs_channels); - if (IS_ERR(priv_state)) - return ERR_CAST(priv_state); + if (!priv_state) + return ERR_PTR(-EINVAL); return to_vc4_hvs_state(priv_state); } @@ -210,8 +210,8 @@ vc4_hvs_get_old_global_state(struct drm_atomic_state *state) struct drm_private_state *priv_state; priv_state = drm_atomic_get_old_private_obj_state(state, &vc4->hvs_channels); - if (IS_ERR(priv_state)) - return ERR_CAST(priv_state); + if (!priv_state) + return ERR_PTR(-EINVAL); return to_vc4_hvs_state(priv_state); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c index 06d9e106e3c5..e76976a95a1e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c @@ -1097,21 +1097,21 @@ int vmw_mksstat_add_ioctl(struct drm_device *dev, void *data, reset_ppn_array(pdesc->strsPPNs, ARRAY_SIZE(pdesc->strsPPNs)); /* Pin mksGuestStat user pages and store those in the instance descriptor */ - nr_pinned_stat = pin_user_pages(arg->stat, num_pages_stat, FOLL_LONGTERM, pages_stat, NULL); + nr_pinned_stat = pin_user_pages_fast(arg->stat, num_pages_stat, FOLL_LONGTERM, pages_stat); if (num_pages_stat != nr_pinned_stat) goto err_pin_stat; for (i = 0; i < num_pages_stat; ++i) pdesc->statPPNs[i] = page_to_pfn(pages_stat[i]); - nr_pinned_info = pin_user_pages(arg->info, num_pages_info, FOLL_LONGTERM, pages_info, NULL); + nr_pinned_info = pin_user_pages_fast(arg->info, num_pages_info, FOLL_LONGTERM, pages_info); if (num_pages_info != nr_pinned_info) goto err_pin_info; for (i = 0; i < num_pages_info; ++i) pdesc->infoPPNs[i] = page_to_pfn(pages_info[i]); - nr_pinned_strs = pin_user_pages(arg->strs, num_pages_strs, FOLL_LONGTERM, pages_strs, NULL); + nr_pinned_strs = pin_user_pages_fast(arg->strs, num_pages_strs, FOLL_LONGTERM, pages_strs); if (num_pages_strs != nr_pinned_strs) goto err_pin_strs; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index 8db61c541a80..e1f36a09c59c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -926,6 +926,10 @@ int vmw_kms_sou_init_display(struct vmw_private *dev_priv) struct drm_device *dev = &dev_priv->drm; int i; + /* Screen objects won't work if GMR's aren't available */ + if (!dev_priv->has_gmr) + return -ENOSYS; + if (!(dev_priv->capabilities & SVGA_CAP_SCREEN_OBJECT_2)) { return -ENOSYS; } |