diff options
Diffstat (limited to 'drivers/gpu/drm')
229 files changed, 2165 insertions, 1441 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 38408e4e158e..c7b18c52825d 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -39,23 +39,7 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \ -I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \ -I$(FULL_AMD_PATH)/amdkfd -subdir-ccflags-y := -Wextra -subdir-ccflags-y += -Wunused -subdir-ccflags-y += -Wmissing-prototypes -subdir-ccflags-y += -Wmissing-declarations -subdir-ccflags-y += -Wmissing-include-dirs -subdir-ccflags-y += -Wold-style-definition -subdir-ccflags-y += -Wmissing-format-attribute -# Need this to avoid recursive variable evaluation issues -cond-flags := $(call cc-option, -Wunused-but-set-variable) \ - $(call cc-option, -Wunused-const-variable) \ - $(call cc-option, -Wstringop-truncation) \ - $(call cc-option, -Wpacked-not-aligned) -subdir-ccflags-y += $(cond-flags) -subdir-ccflags-y += -Wno-unused-parameter -subdir-ccflags-y += -Wno-type-limits -subdir-ccflags-y += -Wno-sign-compare -subdir-ccflags-y += -Wno-missing-field-initializers +# Locally disable W=1 warnings enabled in drm subsystem Makefile subdir-ccflags-y += -Wno-override-init subdir-ccflags-$(CONFIG_DRM_AMDGPU_WERROR) += -Werror diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e095572458cd..9b1e0ede05a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -237,6 +237,7 @@ extern int sched_policy; extern bool debug_evictions; extern bool no_system_mem_limit; extern int halt_if_hws_hang; +extern uint amdgpu_svm_default_granularity; #else static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS; static const bool __maybe_unused debug_evictions; /* = false */ @@ -1082,10 +1083,6 @@ struct amdgpu_device { struct amdgpu_virt virt; - /* link all shadow bo */ - struct list_head shadow_list; - struct mutex shadow_list_lock; - /* record hw reset is performed */ bool has_hw_reset; u8 reset_magic[AMDGPU_RESET_MAGIC_NUM]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 57bda66e85ef..2ca127173135 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -511,7 +511,7 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h return -EINVAL; } - /* udpate aca bank to aca source error_cache first */ + /* update aca bank to aca source error_cache first */ ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 30c5172d208e..9abf29b58ac7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -20,7 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ #include <linux/module.h> -#include <linux/fdtable.h> #include <linux/uaccess.h> #include <linux/firmware.h> #include "amdgpu.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 1254a43ec96b..3bc0cbf45bc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -950,28 +950,30 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev) * @inst: xcc's instance number on a multi-XCC setup */ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, - int *wave_cnt, int *vmid, uint32_t inst) + struct kfd_cu_occupancy *queue_cnt, uint32_t inst) { int pipe_idx; int queue_slot; unsigned int reg_val; - + unsigned int wave_cnt; /* * Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID * parameters to read out waves in flight. Get VMID if there are * non-zero waves in flight. */ - *vmid = 0xFF; - *wave_cnt = 0; pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe; queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe; - soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst); - reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, mmSPI_CSQ_WF_ACTIVE_COUNT_0) + - queue_slot); - *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; - if (*wave_cnt != 0) - *vmid = (RREG32_SOC15(GC, inst, mmCP_HQD_VMID) & - CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT; + soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, GET_INST(GC, inst)); + reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, GET_INST(GC, inst), + mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot); + wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; + if (wave_cnt != 0) { + queue_cnt->wave_cnt += wave_cnt; + queue_cnt->doorbell_off = + (RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL) & + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK) >> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + } } /** @@ -981,9 +983,8 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, * or more queues running and submitting waves to compute units. * * @adev: Handle of device from which to get number of waves in flight - * @pasid: Identifies the process for which this query call is invoked - * @pasid_wave_cnt: Output parameter updated with number of waves in flight that - * belong to process with given pasid + * @cu_occupancy: Array that gets filled with wave_cnt and doorbell offset + * for comparison later. * @max_waves_per_cu: Output parameter updated with maximum number of waves * possible per Compute Unit * @inst: xcc's instance number on a multi-XCC setup @@ -1011,34 +1012,28 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, * number of waves that are in flight for the queue at specified index. The * index ranges from 0 to 7. * - * If non-zero waves are in flight, read CP_HQD_VMID register to obtain VMID - * of the wave(s). + * If non-zero waves are in flight, store the corresponding doorbell offset + * of the queue, along with the wave count. * - * Determine if VMID from above step maps to pasid provided as parameter. If - * it matches agrregate the wave count. That the VMID will not match pasid is - * a normal condition i.e. a device is expected to support multiple queues - * from multiple proceses. + * Determine if the queue belongs to the process by comparing the doorbell + * offset against the process's queues. If it matches, aggregate the wave + * count for the process. * * Reading registers referenced above involves programming GRBM appropriately */ -void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, - int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst) +void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, + struct kfd_cu_occupancy *cu_occupancy, + int *max_waves_per_cu, uint32_t inst) { int qidx; - int vmid; int se_idx; - int sh_idx; int se_cnt; - int sh_cnt; - int wave_cnt; int queue_map; - int pasid_tmp; int max_queue_cnt; - int vmid_wave_cnt = 0; DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES); lock_spi_csq_mutexes(adev); - soc15_grbm_select(adev, 1, 0, 0, 0, inst); + soc15_grbm_select(adev, 1, 0, 0, 0, GET_INST(GC, inst)); /* * Iterate through the shader engines and arrays of the device @@ -1048,51 +1043,38 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, AMDGPU_MAX_QUEUES); max_queue_cnt = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; - sh_cnt = adev->gfx.config.max_sh_per_se; se_cnt = adev->gfx.config.max_shader_engines; for (se_idx = 0; se_idx < se_cnt; se_idx++) { - for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) { + amdgpu_gfx_select_se_sh(adev, se_idx, 0, 0xffffffff, inst); + queue_map = RREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_CSQ_WF_ACTIVE_STATUS); + + /* + * Assumption: queue map encodes following schema: four + * pipes per each micro-engine, with each pipe mapping + * eight queues. This schema is true for GFX9 devices + * and must be verified for newer device families + */ + for (qidx = 0; qidx < max_queue_cnt; qidx++) { + /* Skip qeueus that are not associated with + * compute functions + */ + if (!test_bit(qidx, cp_queue_bitmap)) + continue; - amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff, inst); - queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS); + if (!(queue_map & (1 << qidx))) + continue; - /* - * Assumption: queue map encodes following schema: four - * pipes per each micro-engine, with each pipe mapping - * eight queues. This schema is true for GFX9 devices - * and must be verified for newer device families - */ - for (qidx = 0; qidx < max_queue_cnt; qidx++) { - - /* Skip qeueus that are not associated with - * compute functions - */ - if (!test_bit(qidx, cp_queue_bitmap)) - continue; - - if (!(queue_map & (1 << qidx))) - continue; - - /* Get number of waves in flight and aggregate them */ - get_wave_count(adev, qidx, &wave_cnt, &vmid, - inst); - if (wave_cnt != 0) { - pasid_tmp = - RREG32(SOC15_REG_OFFSET(OSSSYS, inst, - mmIH_VMID_0_LUT) + vmid); - if (pasid_tmp == pasid) - vmid_wave_cnt += wave_cnt; - } - } + /* Get number of waves in flight and aggregate them */ + get_wave_count(adev, qidx, &cu_occupancy[qidx], + inst); } } amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst); - soc15_grbm_select(adev, 0, 0, 0, 0, inst); + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst)); unlock_spi_csq_mutexes(adev); /* Update the output parameters and return */ - *pasid_wave_cnt = vmid_wave_cnt; *max_waves_per_cu = adev->gfx.cu_info.simd_per_cu * adev->gfx.cu_info.max_waves_per_simd; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index 988c50ac3be0..b6a91a552aa4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -52,8 +52,9 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid); void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base); -void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, - int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst); +void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, + struct kfd_cu_occupancy *cu_occupancy, + int *max_waves_per_cu, uint32_t inst); void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 6d5fd371d5ce..ce5ca304dba9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -25,7 +25,6 @@ #include <linux/pagemap.h> #include <linux/sched/mm.h> #include <linux/sched/task.h> -#include <linux/fdtable.h> #include <drm/ttm/ttm_tt.h> #include <drm/drm_exec.h> @@ -818,18 +817,13 @@ static int kfd_mem_export_dmabuf(struct kgd_mem *mem) if (!mem->dmabuf) { struct amdgpu_device *bo_adev; struct dma_buf *dmabuf; - int r, fd; bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); - r = drm_gem_prime_handle_to_fd(&bo_adev->ddev, bo_adev->kfd.client.file, + dmabuf = drm_gem_prime_handle_to_dmabuf(&bo_adev->ddev, bo_adev->kfd.client.file, mem->gem_handle, mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? - DRM_RDWR : 0, &fd); - if (r) - return r; - dmabuf = dma_buf_get(fd); - close_fd(fd); - if (WARN_ON_ONCE(IS_ERR(dmabuf))) + DRM_RDWR : 0); + if (IS_ERR(dmabuf)) return PTR_ERR(dmabuf); mem->dmabuf = dmabuf; } @@ -1505,7 +1499,7 @@ static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain) } } - ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0); + ret = amdgpu_bo_pin(bo, domain); if (ret) pr_err("Error in Pinning BO to domain: %d\n", domain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 42e64bce661e..45affc02548c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -87,8 +87,9 @@ static bool check_atom_bios(uint8_t *bios, size_t size) * part of the system bios. On boot, the system bios puts a * copy of the igp rom at the start of vram if a discrete card is * present. + * For SR-IOV, the vbios image is also put in VRAM in the VF. */ -static bool igp_read_bios_from_vram(struct amdgpu_device *adev) +static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev) { uint8_t __iomem *bios; resource_size_t vram_base; @@ -284,10 +285,6 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev) acpi_status status; bool found = false; - /* ATRM is for the discrete card only */ - if (adev->flags & AMD_IS_APU) - return false; - /* ATRM is for on-platform devices only */ if (dev_is_removable(&adev->pdev->dev)) return false; @@ -343,11 +340,8 @@ static inline bool amdgpu_atrm_get_bios(struct amdgpu_device *adev) static bool amdgpu_read_disabled_bios(struct amdgpu_device *adev) { - if (adev->flags & AMD_IS_APU) - return igp_read_bios_from_vram(adev); - else - return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ? - false : amdgpu_asic_read_disabled_bios(adev); + return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ? + false : amdgpu_asic_read_disabled_bios(adev); } #ifdef CONFIG_ACPI @@ -414,7 +408,36 @@ static inline bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev) } #endif -bool amdgpu_get_bios(struct amdgpu_device *adev) +static bool amdgpu_get_bios_apu(struct amdgpu_device *adev) +{ + if (amdgpu_acpi_vfct_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from VFCT\n"); + goto success; + } + + if (amdgpu_read_bios_from_vram(adev)) { + dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n"); + goto success; + } + + if (amdgpu_read_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n"); + goto success; + } + + if (amdgpu_read_platform_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from platform\n"); + goto success; + } + + dev_err(adev->dev, "Unable to locate a BIOS ROM\n"); + return false; + +success: + return true; +} + +static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev) { if (amdgpu_atrm_get_bios(adev)) { dev_info(adev->dev, "Fetched VBIOS from ATRM\n"); @@ -426,7 +449,8 @@ bool amdgpu_get_bios(struct amdgpu_device *adev) goto success; } - if (igp_read_bios_from_vram(adev)) { + /* this is required for SR-IOV */ + if (amdgpu_read_bios_from_vram(adev)) { dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n"); goto success; } @@ -455,10 +479,24 @@ bool amdgpu_get_bios(struct amdgpu_device *adev) return false; success: - adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10; return true; } +bool amdgpu_get_bios(struct amdgpu_device *adev) +{ + bool found; + + if (adev->flags & AMD_IS_APU) + found = amdgpu_get_bios_apu(adev); + else + found = amdgpu_get_bios_dgpu(adev); + + if (found) + adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10; + + return found; +} + /* helper function for soc15 and onwards to read bios from rom */ bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev, u8 *bios, u32 length_bytes) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e479ff33a0a7..fd853dc843e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4107,9 +4107,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->mm_stats.lock); spin_lock_init(&adev->wb.lock); - INIT_LIST_HEAD(&adev->shadow_list); - mutex_init(&adev->shadow_list_lock); - INIT_LIST_HEAD(&adev->reset_list); INIT_LIST_HEAD(&adev->ras_list); @@ -5030,80 +5027,6 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) } /** - * amdgpu_device_recover_vram - Recover some VRAM contents - * - * @adev: amdgpu_device pointer - * - * Restores the contents of VRAM buffers from the shadows in GTT. Used to - * restore things like GPUVM page tables after a GPU reset where - * the contents of VRAM might be lost. - * - * Returns: - * 0 on success, negative error code on failure. - */ -static int amdgpu_device_recover_vram(struct amdgpu_device *adev) -{ - struct dma_fence *fence = NULL, *next = NULL; - struct amdgpu_bo *shadow; - struct amdgpu_bo_vm *vmbo; - long r = 1, tmo; - - if (amdgpu_sriov_runtime(adev)) - tmo = msecs_to_jiffies(8000); - else - tmo = msecs_to_jiffies(100); - - dev_info(adev->dev, "recover vram bo from shadow start\n"); - mutex_lock(&adev->shadow_list_lock); - list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) { - /* If vm is compute context or adev is APU, shadow will be NULL */ - if (!vmbo->shadow) - continue; - shadow = vmbo->shadow; - - /* No need to recover an evicted BO */ - if (!shadow->tbo.resource || - shadow->tbo.resource->mem_type != TTM_PL_TT || - shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET || - shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM) - continue; - - r = amdgpu_bo_restore_shadow(shadow, &next); - if (r) - break; - - if (fence) { - tmo = dma_fence_wait_timeout(fence, false, tmo); - dma_fence_put(fence); - fence = next; - if (tmo == 0) { - r = -ETIMEDOUT; - break; - } else if (tmo < 0) { - r = tmo; - break; - } - } else { - fence = next; - } - } - mutex_unlock(&adev->shadow_list_lock); - - if (fence) - tmo = dma_fence_wait_timeout(fence, false, tmo); - dma_fence_put(fence); - - if (r < 0 || tmo <= 0) { - dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo); - return -EIO; - } - - dev_info(adev->dev, "recover vram bo from shadow done\n"); - return 0; -} - - -/** * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf * * @adev: amdgpu_device pointer @@ -5165,12 +5088,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (r) return r; - if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { + if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) amdgpu_inc_vram_lost(adev); - r = amdgpu_device_recover_vram(adev); - } - if (r) - return r; /* need to be called during full access so we can't do it later like * bare-metal does. @@ -5569,9 +5488,7 @@ out: } } - if (!r) - r = amdgpu_device_recover_vram(tmp_adev); - else + if (r) tmp_adev->asic_reset_res = r; } @@ -6189,7 +6106,7 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, p2p_addressable = !(adev->gmc.aper_base & address_mask || aper_limit & address_mask); } - return is_large_bar && p2p_access && p2p_addressable; + return pcie_p2p && is_large_bar && p2p_access && p2p_addressable; #else return false; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 092ec11258cd..b119d27271c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -233,6 +233,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, } if (!adev->enable_virtual_display) { + new_abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev, new_abo->flags)); if (unlikely(r != 0)) { @@ -1474,7 +1475,7 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc, if ((!(mode->flags & DRM_MODE_FLAG_INTERLACE)) && ((amdgpu_encoder->underscan_type == UNDERSCAN_ON) || ((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) && - connector->display_info.is_hdmi && + connector && connector->display_info.is_hdmi && amdgpu_display_is_hdtv_mode(mode)))) { if (amdgpu_encoder->underscan_hborder != 0) amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder; @@ -1759,6 +1760,7 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev) r = amdgpu_bo_reserve(aobj, true); if (r == 0) { + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); if (r != 0) dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8dee7c62c801..81d9877c8735 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -117,9 +117,10 @@ * - 3.56.0 - Update IB start address and size alignment for decode and encode * - 3.57.0 - Compute tunneling on GFX10+ * - 3.58.0 - Add GFX12 DCC support + * - 3.59.0 - Cleared VRAM */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 58 +#define KMS_DRIVER_MINOR 59 #define KMS_DRIVER_PATCHLEVEL 0 /* @@ -169,6 +170,16 @@ uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu; char *amdgpu_virtual_display; bool enforce_isolation; + +/* Specifies the default granularity for SVM, used in buffer + * migration and restoration of backing memory when handling + * recoverable page faults. + * + * The value is given as log(numPages(buffer)); for a 2 MiB + * buffer it computes to be 9 + */ +uint amdgpu_svm_default_granularity = 9; + /* * OverDrive(bit 14) disabled by default * GFX DCS(bit 19) disabled by default @@ -321,6 +332,13 @@ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(msi, amdgpu_msi, int, 0444); /** + * DOC: svm_default_granularity (uint) + * Used in buffer migration and handling of recoverable page faults + */ +MODULE_PARM_DESC(svm_default_granularity, "SVM's default granularity in log(2^Pages), default 9 = 2^9 = 2 MiB"); +module_param_named(svm_default_granularity, amdgpu_svm_default_granularity, uint, 0644); + +/** * DOC: lockup_timeout (string) * Set GPU scheduler timeout value in ms. * @@ -2914,6 +2932,7 @@ static const struct file_operations amdgpu_driver_kms_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = drm_show_fdinfo, #endif + .fop_flags = FOP_UNSIGNED_OFFSET, }; int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 0e617dff8765..1a5df8b94661 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -43,8 +43,6 @@ #include "amdgpu_hmm.h" #include "amdgpu_xgmi.h" -static const struct drm_gem_object_funcs amdgpu_gem_object_funcs; - static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf) { struct ttm_buffer_object *bo = vmf->vma->vm_private_data; @@ -87,11 +85,11 @@ static const struct vm_operations_struct amdgpu_gem_vm_ops = { static void amdgpu_gem_object_free(struct drm_gem_object *gobj) { - struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj); + struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj); - if (robj) { - amdgpu_hmm_unregister(robj); - amdgpu_bo_unref(&robj); + if (aobj) { + amdgpu_hmm_unregister(aobj); + ttm_bo_put(&aobj->tbo); } } @@ -126,7 +124,6 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, bo = &ubo->bo; *obj = &bo->tbo.base; - (*obj)->funcs = &amdgpu_gem_object_funcs; return 0; } @@ -295,7 +292,7 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str return drm_gem_ttm_mmap(obj, vma); } -static const struct drm_gem_object_funcs amdgpu_gem_object_funcs = { +const struct drm_gem_object_funcs amdgpu_gem_object_funcs = { .free = amdgpu_gem_object_free, .open = amdgpu_gem_object_open, .close = amdgpu_gem_object_close, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index f30264782ba2..3a8f57900a3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -33,6 +33,8 @@ #define AMDGPU_GEM_DOMAIN_MAX 0x3 #define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, tbo.base) +extern const struct drm_gem_object_funcs amdgpu_gem_object_funcs; + unsigned long amdgpu_gem_timeout(uint64_t timeout_ns); /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index b779d47a546a..83e54697f0ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1397,14 +1397,23 @@ static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev, static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - long timeout = msecs_to_jiffies(1000); - struct dma_fence *f = NULL; + struct drm_gpu_scheduler *sched = &ring->sched; + struct drm_sched_entity entity; + struct dma_fence *f; struct amdgpu_job *job; struct amdgpu_ib *ib; int i, r; - r = amdgpu_job_alloc_with_ib(adev, NULL, NULL, - 64, AMDGPU_IB_POOL_DIRECT, + /* Initialize the scheduler entity */ + r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); + if (r) { + dev_err(adev->dev, "Failed setting up GFX kernel entity.\n"); + goto err; + } + + r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL, + 64, 0, &job); if (r) goto err; @@ -1416,24 +1425,18 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) ib->ptr[i] = ring->funcs->nop; ib->length_dw = ring->funcs->align_mask + 1; - r = amdgpu_job_submit_direct(job, ring, &f); - if (r) - goto err_free; + f = amdgpu_job_submit(job); - r = dma_fence_wait_timeout(f, false, timeout); - if (r == 0) - r = -ETIMEDOUT; - else if (r > 0) - r = 0; + r = dma_fence_wait(f, false); + if (r) + goto err; - amdgpu_ib_free(adev, ib, f); dma_fence_put(f); + /* Clean up the scheduler entity */ + drm_sched_entity_destroy(&entity); return 0; -err_free: - amdgpu_job_free(job); - amdgpu_ib_free(adev, ib, f); err: return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 570820edcfda..fffa4430784f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -107,8 +107,11 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) /* * Do the coredump immediately after a job timeout to get a very * close dump/snapshot/representation of GPU's current error status + * Skip it for SRIOV, since VF FLR will be triggered by host driver + * before job timeout */ - amdgpu_job_core_dump(adev, job); + if (!amdgpu_sriov_vf(adev)) + amdgpu_job_core_dump(adev, job); if (amdgpu_gpu_recovery && amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index f7d5d4f08a53..10b61ff63802 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -832,6 +832,7 @@ int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type, struct mes_reset_queue_input queue_input; int r; + queue_input.queue_type = queue_type; queue_input.use_mmio = true; queue_input.me_id = me_id; queue_input.pipe_id = pipe_id; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e32161f6b67a..44819cdba7fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -77,24 +77,6 @@ static void amdgpu_bo_user_destroy(struct ttm_buffer_object *tbo) amdgpu_bo_destroy(tbo); } -static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo) -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); - struct amdgpu_bo *shadow_bo = ttm_to_amdgpu_bo(tbo), *bo; - struct amdgpu_bo_vm *vmbo; - - bo = shadow_bo->parent; - vmbo = to_amdgpu_bo_vm(bo); - /* in case amdgpu_device_recover_vram got NULL of bo->parent */ - if (!list_empty(&vmbo->shadow_list)) { - mutex_lock(&adev->shadow_list_lock); - list_del_init(&vmbo->shadow_list); - mutex_unlock(&adev->shadow_list_lock); - } - - amdgpu_bo_destroy(tbo); -} - /** * amdgpu_bo_is_amdgpu_bo - check if the buffer object is an &amdgpu_bo * @bo: buffer object to be checked @@ -108,8 +90,7 @@ static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo) bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo) { if (bo->destroy == &amdgpu_bo_destroy || - bo->destroy == &amdgpu_bo_user_destroy || - bo->destroy == &amdgpu_bo_vm_destroy) + bo->destroy == &amdgpu_bo_user_destroy) return true; return false; @@ -583,6 +564,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, if (bo == NULL) return -ENOMEM; drm_gem_private_object_init(adev_to_drm(adev), &bo->tbo.base, size); + bo->tbo.base.funcs = &amdgpu_gem_object_funcs; bo->vm_bo = NULL; bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : bp->domain; @@ -723,52 +705,6 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev, } /** - * amdgpu_bo_add_to_shadow_list - add a BO to the shadow list - * - * @vmbo: BO that will be inserted into the shadow list - * - * Insert a BO to the shadow list. - */ -void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo) -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(vmbo->bo.tbo.bdev); - - mutex_lock(&adev->shadow_list_lock); - list_add_tail(&vmbo->shadow_list, &adev->shadow_list); - vmbo->shadow->parent = amdgpu_bo_ref(&vmbo->bo); - vmbo->shadow->tbo.destroy = &amdgpu_bo_vm_destroy; - mutex_unlock(&adev->shadow_list_lock); -} - -/** - * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow - * - * @shadow: &amdgpu_bo shadow to be restored - * @fence: dma_fence associated with the operation - * - * Copies a buffer object's shadow content back to the object. - * This is used for recovering a buffer from its shadow in case of a gpu - * reset where vram context may be lost. - * - * Returns: - * 0 for success or a negative error code on failure. - */ -int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence) - -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev); - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; - uint64_t shadow_addr, parent_addr; - - shadow_addr = amdgpu_bo_gpu_offset(shadow); - parent_addr = amdgpu_bo_gpu_offset(shadow->parent); - - return amdgpu_copy_buffer(ring, shadow_addr, parent_addr, - amdgpu_bo_size(shadow), NULL, fence, - true, false, 0); -} - -/** * amdgpu_bo_kmap - map an &amdgpu_bo buffer object * @bo: &amdgpu_bo buffer object to be mapped * @ptr: kernel virtual address to be returned @@ -851,7 +787,7 @@ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo) if (bo == NULL) return NULL; - ttm_bo_get(&bo->tbo); + drm_gem_object_get(&bo->tbo.base); return bo; } @@ -863,40 +799,30 @@ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo) */ void amdgpu_bo_unref(struct amdgpu_bo **bo) { - struct ttm_buffer_object *tbo; - if ((*bo) == NULL) return; - tbo = &((*bo)->tbo); - ttm_bo_put(tbo); + drm_gem_object_put(&(*bo)->tbo.base); *bo = NULL; } /** - * amdgpu_bo_pin_restricted - pin an &amdgpu_bo buffer object + * amdgpu_bo_pin - pin an &amdgpu_bo buffer object * @bo: &amdgpu_bo buffer object to be pinned * @domain: domain to be pinned to - * @min_offset: the start of requested address range - * @max_offset: the end of requested address range * - * Pins the buffer object according to requested domain and address range. If - * the memory is unbound gart memory, binds the pages into gart table. Adjusts - * pin_count and pin_size accordingly. + * Pins the buffer object according to requested domain. If the memory is + * unbound gart memory, binds the pages into gart table. Adjusts pin_count and + * pin_size accordingly. * * Pinning means to lock pages in memory along with keeping them at a fixed * offset. It is required when a buffer can not be moved, for example, when * a display buffer is being scanned out. * - * Compared with amdgpu_bo_pin(), this function gives more flexibility on - * where to pin a buffer if there are specific restrictions on where a buffer - * must be located. - * * Returns: * 0 for success or a negative error code on failure. */ -int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, - u64 min_offset, u64 max_offset) +int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_operation_ctx ctx = { false, false }; @@ -905,9 +831,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) return -EPERM; - if (WARN_ON_ONCE(min_offset > max_offset)) - return -EINVAL; - /* Check domain to be pinned to against preferred domains */ if (bo->preferred_domains & domain) domain = bo->preferred_domains & domain; @@ -933,14 +856,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, return -EINVAL; ttm_bo_pin(&bo->tbo); - - if (max_offset != 0) { - u64 domain_start = amdgpu_ttm_domain_start(adev, - mem_type); - WARN_ON_ONCE(max_offset < - (amdgpu_bo_gpu_offset(bo) - domain_start)); - } - return 0; } @@ -957,17 +872,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; amdgpu_bo_placement_from_domain(bo, domain); for (i = 0; i < bo->placement.num_placement; i++) { - unsigned int fpfn, lpfn; - - fpfn = min_offset >> PAGE_SHIFT; - lpfn = max_offset >> PAGE_SHIFT; - - if (fpfn > bo->placements[i].fpfn) - bo->placements[i].fpfn = fpfn; - if (!bo->placements[i].lpfn || - (lpfn && lpfn < bo->placements[i].lpfn)) - bo->placements[i].lpfn = lpfn; - if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && bo->placements[i].mem_type == TTM_PL_VRAM) bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; @@ -994,24 +898,6 @@ error: } /** - * amdgpu_bo_pin - pin an &amdgpu_bo buffer object - * @bo: &amdgpu_bo buffer object to be pinned - * @domain: domain to be pinned to - * - * A simple wrapper to amdgpu_bo_pin_restricted(). - * Provides a simpler API for buffers that do not have any strict restrictions - * on where a buffer must be located. - * - * Returns: - * 0 for success or a negative error code on failure. - */ -int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain) -{ - bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - return amdgpu_bo_pin_restricted(bo, domain, 0, 0); -} - -/** * amdgpu_bo_unpin - unpin an &amdgpu_bo buffer object * @bo: &amdgpu_bo buffer object to be unpinned * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index d7e27957013f..717e47b46167 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -136,8 +136,6 @@ struct amdgpu_bo_user { struct amdgpu_bo_vm { struct amdgpu_bo bo; - struct amdgpu_bo *shadow; - struct list_head shadow_list; struct amdgpu_vm_bo_base entries[]; }; @@ -275,22 +273,6 @@ static inline bool amdgpu_bo_encrypted(struct amdgpu_bo *bo) return bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED; } -/** - * amdgpu_bo_shadowed - check if the BO is shadowed - * - * @bo: BO to be tested. - * - * Returns: - * NULL if not shadowed or else return a BO pointer. - */ -static inline struct amdgpu_bo *amdgpu_bo_shadowed(struct amdgpu_bo *bo) -{ - if (bo->tbo.type == ttm_bo_type_kernel) - return to_amdgpu_bo_vm(bo)->shadow; - - return NULL; -} - bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain); @@ -322,8 +304,6 @@ void amdgpu_bo_kunmap(struct amdgpu_bo *bo); struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo); void amdgpu_bo_unref(struct amdgpu_bo **bo); int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain); -int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, - u64 min_offset, u64 max_offset); void amdgpu_bo_unpin(struct amdgpu_bo *bo); int amdgpu_bo_init(struct amdgpu_device *adev); void amdgpu_bo_fini(struct amdgpu_device *adev); @@ -349,9 +329,6 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo); void amdgpu_bo_get_memory(struct amdgpu_bo *bo, struct amdgpu_mem_stats *stats); -void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo); -int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, - struct dma_fence **fence); uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, uint32_t domain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c index 0bb2466d539a..675aa138ea11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c @@ -94,7 +94,7 @@ static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev, unsigned int n ref_div_max = min(128 / post_div, ref_div_max); /* get matching reference and feedback divider */ - *ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u), ref_div_max); + *ref_div = clamp(DIV_ROUND_CLOSEST(den, post_div), 1u, ref_div_max); *fb_div = DIV_ROUND_CLOSEST(nom * *ref_div * post_div, den); /* limit fb divider to its maximum */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 189574d53ebd..0b28b2cf1517 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2853,7 +2853,7 @@ static int psp_load_non_psp_fw(struct psp_context *psp) if (ret) return ret; - /* Start rlc autoload after psp recieved all the gfx firmware */ + /* Start rlc autoload after psp received all the gfx firmware */ if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ? adev->virt.autoload_ucode_id : AMDGPU_UCODE_ID_RLC_G)) { ret = psp_rlc_autoload_start(psp); @@ -3425,9 +3425,11 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) const struct psp_firmware_header_v1_2 *sos_hdr_v1_2; const struct psp_firmware_header_v1_3 *sos_hdr_v1_3; const struct psp_firmware_header_v2_0 *sos_hdr_v2_0; - int err = 0; + const struct psp_firmware_header_v2_1 *sos_hdr_v2_1; + int fw_index, fw_bin_count, start_index = 0; + const struct psp_fw_bin_desc *fw_bin; uint8_t *ucode_array_start_addr; - int fw_index = 0; + int err = 0; err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, "amdgpu/%s_sos.bin", chip_name); if (err) @@ -3478,15 +3480,30 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) case 2: sos_hdr_v2_0 = (const struct psp_firmware_header_v2_0 *)adev->psp.sos_fw->data; - if (le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) { + fw_bin_count = le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count); + + if (fw_bin_count >= UCODE_MAX_PSP_PACKAGING) { dev_err(adev->dev, "packed SOS count exceeds maximum limit\n"); err = -EINVAL; goto out; } - for (fw_index = 0; fw_index < le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count); fw_index++) { - err = parse_sos_bin_descriptor(psp, - &sos_hdr_v2_0->psp_fw_bin[fw_index], + if (sos_hdr_v2_0->header.header_version_minor == 1) { + sos_hdr_v2_1 = (const struct psp_firmware_header_v2_1 *)adev->psp.sos_fw->data; + + fw_bin = sos_hdr_v2_1->psp_fw_bin; + + if (psp_is_aux_sos_load_required(psp)) + start_index = le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index); + else + fw_bin_count -= le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index); + + } else { + fw_bin = sos_hdr_v2_0->psp_fw_bin; + } + + for (fw_index = start_index; fw_index < fw_bin_count; fw_index++) { + err = parse_sos_bin_descriptor(psp, fw_bin + fw_index, sos_hdr_v2_0); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 74a96516c913..e8abbbcb4326 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -138,6 +138,7 @@ struct psp_funcs { int (*vbflash_stat)(struct psp_context *psp); int (*fatal_error_recovery_quirk)(struct psp_context *psp); bool (*get_ras_capability)(struct psp_context *psp); + bool (*is_aux_sos_load_required)(struct psp_context *psp); }; struct ta_funcs { @@ -464,6 +465,9 @@ struct amdgpu_psp_funcs { ((psp)->funcs->fatal_error_recovery_quirk ? \ (psp)->funcs->fatal_error_recovery_quirk((psp)) : 0) +#define psp_is_aux_sos_load_required(psp) \ + ((psp)->funcs->is_aux_sos_load_required ? (psp)->funcs->is_aux_sos_load_required((psp)) : 0) + extern const struct amd_ip_funcs psp_ip_funcs; extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 61a2f386d9fb..1a1395c5fff1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -882,7 +882,7 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev, if (ret) return ret; - /* gfx block ras dsiable cmd must send to ras-ta */ + /* gfx block ras disable cmd must send to ras-ta */ if (head->block == AMDGPU_RAS_BLOCK__GFX) con->features |= BIT(head->block); @@ -3468,6 +3468,11 @@ init_ras_enabled_flag: /* aca is disabled by default */ adev->aca.is_enabled = false; + + /* bad page feature is not applicable to specific app platform */ + if (adev->gmc.is_app_apu && + amdgpu_ip_version(adev, UMC_HWIP, 0) == IP_VERSION(12, 0, 0)) + amdgpu_bad_page_threshold = 0; } static void amdgpu_ras_counte_dw(struct work_struct *work) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index aab8077e5098..f28f6b4ba765 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -58,7 +58,7 @@ #define EEPROM_I2C_MADDR_4 0x40000 /* - * The 2 macros bellow represent the actual size in bytes that + * The 2 macros below represent the actual size in bytes that * those entities occupy in the EEPROM memory. * RAS_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which * uses uint64 to store 6b fields such as retired_page. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 863b2a34b2d6..b0a8abc7a8ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -22,7 +22,6 @@ * Authors: Andres Rodriguez <andresx7@gmail.com> */ -#include <linux/fdtable.h> #include <linux/file.h> #include <linux/pid.h> @@ -43,10 +42,10 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev, uint32_t id; int r; - if (!f.file) + if (!fd_file(f)) return -EINVAL; - r = amdgpu_file_to_fpriv(f.file, &fpriv); + r = amdgpu_file_to_fpriv(fd_file(f), &fpriv); if (r) { fdput(f); return r; @@ -72,10 +71,10 @@ static int amdgpu_sched_context_priority_override(struct amdgpu_device *adev, struct amdgpu_ctx *ctx; int r; - if (!f.file) + if (!fd_file(f)) return -EINVAL; - r = amdgpu_file_to_fpriv(f.file, &fpriv); + r = amdgpu_file_to_fpriv(fd_file(f), &fpriv); if (r) { fdput(f); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index bdf1ef825d89..c586ab4c911b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -260,6 +260,36 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, return 0; } +/** + * amdgpu_sync_kfd - sync to KFD fences + * + * @sync: sync object to add KFD fences to + * @resv: reservation object with KFD fences + * + * Extract all KFD fences and add them to the sync object. + */ +int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv) +{ + struct dma_resv_iter cursor; + struct dma_fence *f; + int r = 0; + + dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP); + dma_resv_for_each_fence_unlocked(&cursor, f) { + void *fence_owner = amdgpu_sync_get_owner(f); + + if (fence_owner != AMDGPU_FENCE_OWNER_KFD) + continue; + + r = amdgpu_sync_fence(sync, f); + if (r) + break; + } + dma_resv_iter_end(&cursor); + + return r; +} + /* Free the entry back to the slab */ static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index cf1e9e858efd..e3272dce798d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -51,6 +51,7 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f); int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner); +int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv); struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b8bc7fa8c375..74adb983ab03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1970,7 +1970,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_INFO("amdgpu: %uM of GTT memory ready.\n", (unsigned int)(gtt_size / (1024 * 1024))); - /* Initiailize doorbell pool on PCI BAR */ + /* Initialize doorbell pool on PCI BAR */ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE); if (r) { DRM_ERROR("Failed initializing doorbell heap.\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 5bc37acd3981..4e23419b92d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -136,6 +136,14 @@ struct psp_firmware_header_v2_0 { struct psp_fw_bin_desc psp_fw_bin[]; }; +/* version_major=2, version_minor=1 */ +struct psp_firmware_header_v2_1 { + struct common_firmware_header header; + uint32_t psp_fw_bin_count; + uint32_t psp_aux_fw_bin_index; + struct psp_fw_bin_desc psp_fw_bin[]; +}; + /* version_major=1, version_minor=0 */ struct ta_firmware_header_v1_0 { struct common_firmware_header header; @@ -426,6 +434,7 @@ union amdgpu_firmware_header { struct psp_firmware_header_v1_1 psp_v1_1; struct psp_firmware_header_v1_3 psp_v1_3; struct psp_firmware_header_v2_0 psp_v2_0; + struct psp_firmware_header_v2_0 psp_v2_1; struct ta_firmware_header_v1_0 ta; struct ta_firmware_header_v2_0 ta_v2_0; struct gfx_firmware_header_v1_0 gfx; @@ -447,7 +456,7 @@ union amdgpu_firmware_header { uint8_t raw[0x100]; }; -#define UCODE_MAX_PSP_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) +#define UCODE_MAX_PSP_PACKAGING (((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) * 2) /* * fw loading support diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index e5f508d34ed8..d4c2afafbb73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -338,6 +338,7 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane, else domain = AMDGPU_GEM_DOMAIN_VRAM; + rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(rbo, domain); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e20d19ae01b2..6005280f5f38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -465,7 +465,6 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, { uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm); struct amdgpu_vm_bo_base *bo_base; - struct amdgpu_bo *shadow; struct amdgpu_bo *bo; int r; @@ -486,16 +485,10 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, spin_unlock(&vm->status_lock); bo = bo_base->bo; - shadow = amdgpu_bo_shadowed(bo); r = validate(param, bo); if (r) return r; - if (shadow) { - r = validate(param, shadow); - if (r) - return r; - } if (bo->tbo.type != ttm_bo_type_kernel) { amdgpu_vm_bo_moved(bo_base); @@ -844,7 +837,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, params.vm = vm; params.immediate = immediate; - r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); + r = vm->update_funcs->prepare(¶ms, NULL); if (r) goto error; @@ -908,10 +901,12 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, { struct amdgpu_vm *vm = params->vm; - if (!fence || !*fence) + tlb_cb->vm = vm; + if (!fence || !*fence) { + amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); return; + } - tlb_cb->vm = vm; if (!dma_fence_add_callback(*fence, &tlb_cb->cb, amdgpu_vm_tlb_seq_cb)) { dma_fence_put(vm->last_tlb_flush); @@ -939,7 +934,7 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, * @unlocked: unlocked invalidation during MM callback * @flush_tlb: trigger tlb invalidation after update completed * @allow_override: change MTYPE for local NUMA nodes - * @resv: fences we need to sync to + * @sync: fences we need to sync to * @start: start of mapped range * @last: last mapped entry * @flags: flags for the entries @@ -955,16 +950,16 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, * 0 for success, negative erro code for failure. */ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, - bool immediate, bool unlocked, bool flush_tlb, bool allow_override, - struct dma_resv *resv, uint64_t start, uint64_t last, - uint64_t flags, uint64_t offset, uint64_t vram_base, + bool immediate, bool unlocked, bool flush_tlb, + bool allow_override, struct amdgpu_sync *sync, + uint64_t start, uint64_t last, uint64_t flags, + uint64_t offset, uint64_t vram_base, struct ttm_resource *res, dma_addr_t *pages_addr, struct dma_fence **fence) { struct amdgpu_vm_tlb_seq_struct *tlb_cb; struct amdgpu_vm_update_params params; struct amdgpu_res_cursor cursor; - enum amdgpu_sync_mode sync_mode; int r, idx; if (!drm_dev_enter(adev_to_drm(adev), &idx)) @@ -997,14 +992,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.allow_override = allow_override; INIT_LIST_HEAD(¶ms.tlb_flush_waitlist); - /* Implicitly sync to command submissions in the same VM before - * unmapping. Sync to moving fences before mapping. - */ - if (!(flags & AMDGPU_PTE_VALID)) - sync_mode = AMDGPU_SYNC_EQ_OWNER; - else - sync_mode = AMDGPU_SYNC_EXPLICIT; - amdgpu_vm_eviction_lock(vm); if (vm->evicting) { r = -EBUSY; @@ -1019,7 +1006,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, dma_fence_put(tmp); } - r = vm->update_funcs->prepare(¶ms, resv, sync_mode); + r = vm->update_funcs->prepare(¶ms, sync); if (r) goto error_free; @@ -1161,23 +1148,36 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; struct amdgpu_bo_va_mapping *mapping; + struct dma_fence **last_update; dma_addr_t *pages_addr = NULL; struct ttm_resource *mem; - struct dma_fence **last_update; + struct amdgpu_sync sync; bool flush_tlb = clear; - bool uncached; - struct dma_resv *resv; uint64_t vram_base; uint64_t flags; + bool uncached; int r; + amdgpu_sync_create(&sync); if (clear || !bo) { mem = NULL; - resv = vm->root.bo->tbo.base.resv; + + /* Implicitly sync to command submissions in the same VM before + * unmapping. + */ + r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv, + AMDGPU_SYNC_EQ_OWNER, vm); + if (r) + goto error_free; + if (bo) { + r = amdgpu_sync_kfd(&sync, bo->tbo.base.resv); + if (r) + goto error_free; + } + } else { struct drm_gem_object *obj = &bo->tbo.base; - resv = bo->tbo.base.resv; if (obj->import_attach && bo_va->is_xgmi) { struct dma_buf *dma_buf = obj->import_attach->dmabuf; struct drm_gem_object *gobj = dma_buf->priv; @@ -1191,6 +1191,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, if (mem && (mem->mem_type == TTM_PL_TT || mem->mem_type == AMDGPU_PL_PREEMPT)) pages_addr = bo->tbo.ttm->dma_address; + + /* Implicitly sync to moving fences before mapping anything */ + r = amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv, + AMDGPU_SYNC_EXPLICIT, vm); + if (r) + goto error_free; } if (bo) { @@ -1240,12 +1246,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, trace_amdgpu_vm_bo_update(mapping); r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, - !uncached, resv, mapping->start, mapping->last, - update_flags, mapping->offset, - vram_base, mem, pages_addr, - last_update); + !uncached, &sync, mapping->start, + mapping->last, update_flags, + mapping->offset, vram_base, mem, + pages_addr, last_update); if (r) - return r; + goto error_free; } /* If the BO is not in its preferred location add it back to @@ -1273,7 +1279,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, trace_amdgpu_vm_bo_mapping(mapping); } - return 0; +error_free: + amdgpu_sync_free(&sync); + return r; } /** @@ -1420,25 +1428,34 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence) { - struct dma_resv *resv = vm->root.bo->tbo.base.resv; struct amdgpu_bo_va_mapping *mapping; - uint64_t init_pte_value = 0; struct dma_fence *f = NULL; + struct amdgpu_sync sync; int r; + + /* + * Implicitly sync to command submissions in the same VM before + * unmapping. + */ + amdgpu_sync_create(&sync); + r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv, + AMDGPU_SYNC_EQ_OWNER, vm); + if (r) + goto error_free; + while (!list_empty(&vm->freed)) { mapping = list_first_entry(&vm->freed, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); r = amdgpu_vm_update_range(adev, vm, false, false, true, false, - resv, mapping->start, mapping->last, - init_pte_value, 0, 0, NULL, NULL, - &f); + &sync, mapping->start, mapping->last, + 0, 0, 0, NULL, NULL, &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); - return r; + goto error_free; } } @@ -1449,7 +1466,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, dma_fence_put(f); } - return 0; +error_free: + amdgpu_sync_free(&sync); + return r; } @@ -2129,10 +2148,6 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, { struct amdgpu_vm_bo_base *bo_base; - /* shadow bo doesn't have bo base, its validation needs its parent */ - if (bo->parent && (amdgpu_bo_shadowed(bo->parent) == bo)) - bo = bo->parent; - for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; @@ -2224,7 +2239,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit + (1 << 30) - 1) >> 30; vm_size = roundup_pow_of_two( - min(max(phys_ram_gb * 3, min_vm_size), max_size)); + clamp(phys_ram_gb * 3, min_vm_size, max_size)); } adev->vm_manager.max_pfn = (uint64_t)vm_size << 18; @@ -2403,7 +2418,6 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id) { - struct amdgpu_ip_block *ip_block; struct amdgpu_bo *root_bo; struct amdgpu_bo_vm *root; int r, i; @@ -2435,11 +2449,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_GFX); - /* use CPU for page table update if SDMA is unavailable */ - ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SDMA); - if (!ip_block || ip_block->status.valid == false) - vm->use_cpu_for_update = true; - DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update && @@ -2468,7 +2477,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, root_bo = amdgpu_bo_ref(&root->bo); r = amdgpu_bo_reserve(root_bo, true); if (r) { - amdgpu_bo_unref(&root->shadow); amdgpu_bo_unref(&root_bo); goto error_free_delayed; } @@ -2561,11 +2569,6 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->last_update = dma_fence_get_stub(); vm->is_compute_context = true; - /* Free the shadow bo for compute VM */ - amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow); - - goto unreserve_bo; - unreserve_bo: amdgpu_bo_unreserve(vm->root.bo); return r; @@ -2770,6 +2773,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) * amdgpu_vm_handle_fault - graceful handling of VM faults. * @adev: amdgpu device pointer * @pasid: PASID of the VM + * @ts: Timestamp of the fault * @vmid: VMID, only used for GFX 9.4.3. * @node_id: Node_id received in IH cookie. Only applicable for * GFX 9.4.3. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index d12d66dca8e9..52dd7cdfdc81 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -304,8 +304,8 @@ struct amdgpu_vm_update_params { struct amdgpu_vm_update_funcs { int (*map_table)(struct amdgpu_bo_vm *bo); - int (*prepare)(struct amdgpu_vm_update_params *p, struct dma_resv *resv, - enum amdgpu_sync_mode sync_mode); + int (*prepare)(struct amdgpu_vm_update_params *p, + struct amdgpu_sync *sync); int (*update)(struct amdgpu_vm_update_params *p, struct amdgpu_bo_vm *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags); @@ -505,9 +505,10 @@ int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev, void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, struct amdgpu_vm *vm, struct amdgpu_bo *bo); int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, - bool immediate, bool unlocked, bool flush_tlb, bool allow_override, - struct dma_resv *resv, uint64_t start, uint64_t last, - uint64_t flags, uint64_t offset, uint64_t vram_base, + bool immediate, bool unlocked, bool flush_tlb, + bool allow_override, struct amdgpu_sync *sync, + uint64_t start, uint64_t last, uint64_t flags, + uint64_t offset, uint64_t vram_base, struct ttm_resource *res, dma_addr_t *pages_addr, struct dma_fence **fence); int amdgpu_vm_bo_update(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index 3895bd7d176a..0c1ef5850a5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -39,20 +39,18 @@ static int amdgpu_vm_cpu_map_table(struct amdgpu_bo_vm *table) * amdgpu_vm_cpu_prepare - prepare page table update with the CPU * * @p: see amdgpu_vm_update_params definition - * @resv: reservation object with embedded fence - * @sync_mode: synchronization mode + * @sync: sync obj with fences to wait on * * Returns: * Negativ errno, 0 for success. */ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, - struct dma_resv *resv, - enum amdgpu_sync_mode sync_mode) + struct amdgpu_sync *sync) { - if (!resv) + if (!sync) return 0; - return amdgpu_bo_sync_wait_resv(p->adev, resv, sync_mode, p->vm, true); + return amdgpu_sync_wait(sync, true); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index e39d6e7643bf..f78a0434a48f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -383,14 +383,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) return r; - if (vmbo->shadow) { - struct amdgpu_bo *shadow = vmbo->shadow; - - r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx); - if (r) - return r; - } - if (!drm_dev_enter(adev_to_drm(adev), &idx)) return -ENODEV; @@ -403,7 +395,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.vm = vm; params.immediate = immediate; - r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); + r = vm->update_funcs->prepare(¶ms, NULL); if (r) goto exit; @@ -448,10 +440,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id) { struct amdgpu_bo_param bp; - struct amdgpu_bo *bo; - struct dma_resv *resv; unsigned int num_entries; - int r; memset(&bp, 0, sizeof(bp)); @@ -484,42 +473,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (vm->root.bo) bp.resv = vm->root.bo->tbo.base.resv; - r = amdgpu_bo_create_vm(adev, &bp, vmbo); - if (r) - return r; - - bo = &(*vmbo)->bo; - if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) { - (*vmbo)->shadow = NULL; - return 0; - } - - if (!bp.resv) - WARN_ON(dma_resv_lock(bo->tbo.base.resv, - NULL)); - resv = bp.resv; - memset(&bp, 0, sizeof(bp)); - bp.size = amdgpu_vm_pt_size(adev, level); - bp.domain = AMDGPU_GEM_DOMAIN_GTT; - bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; - bp.type = ttm_bo_type_kernel; - bp.resv = bo->tbo.base.resv; - bp.bo_ptr_size = sizeof(struct amdgpu_bo); - bp.xcp_id_plus1 = xcp_id + 1; - - r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); - - if (!resv) - dma_resv_unlock(bo->tbo.base.resv); - - if (r) { - amdgpu_bo_unref(&bo); - return r; - } - - amdgpu_bo_add_to_shadow_list(*vmbo); - - return 0; + return amdgpu_bo_create_vm(adev, &bp, vmbo); } /** @@ -569,7 +523,6 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, return 0; error_free_pt: - amdgpu_bo_unref(&pt->shadow); amdgpu_bo_unref(&pt_bo); return r; } @@ -581,17 +534,10 @@ error_free_pt: */ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) { - struct amdgpu_bo *shadow; - if (!entry->bo) return; entry->bo->vm_bo = NULL; - shadow = amdgpu_bo_shadowed(entry->bo); - if (shadow) { - ttm_bo_set_bulk_move(&shadow->tbo, NULL); - amdgpu_bo_unref(&shadow); - } ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); spin_lock(&entry->vm->status_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 9b748d7058b5..46d9fb433ab2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -35,16 +35,7 @@ */ static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table) { - int r; - - r = amdgpu_ttm_alloc_gart(&table->bo.tbo); - if (r) - return r; - - if (table->shadow) - r = amdgpu_ttm_alloc_gart(&table->shadow->tbo); - - return r; + return amdgpu_ttm_alloc_gart(&table->bo.tbo); } /* Allocate a new job for @count PTE updates */ @@ -77,32 +68,24 @@ static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p, * amdgpu_vm_sdma_prepare - prepare SDMA command submission * * @p: see amdgpu_vm_update_params definition - * @resv: reservation object with embedded fence - * @sync_mode: synchronization mode + * @sync: amdgpu_sync object with fences to wait for * * Returns: * Negativ errno, 0 for success. */ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, - struct dma_resv *resv, - enum amdgpu_sync_mode sync_mode) + struct amdgpu_sync *sync) { - struct amdgpu_sync sync; int r; r = amdgpu_vm_sdma_alloc_job(p, 0); if (r) return r; - if (!resv) + if (!sync) return 0; - amdgpu_sync_create(&sync); - r = amdgpu_sync_resv(p->adev, &sync, resv, sync_mode, p->vm); - if (!r) - r = amdgpu_sync_push_to_job(&sync, p->job); - amdgpu_sync_free(&sync); - + r = amdgpu_sync_push_to_job(sync, p->job); if (r) { p->num_dw_left = 0; amdgpu_job_free(p->job); @@ -273,17 +256,13 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, if (!p->pages_addr) { /* set page commands needed */ - if (vmbo->shadow) - amdgpu_vm_sdma_set_ptes(p, vmbo->shadow, pe, addr, - count, incr, flags); amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count, incr, flags); return 0; } /* copy commands needed */ - ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw * - (vmbo->shadow ? 2 : 1); + ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw; /* for padding */ ndw -= 7; @@ -298,8 +277,6 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, pte[i] |= flags; } - if (vmbo->shadow) - amdgpu_vm_sdma_copy_ptes(p, vmbo->shadow, pe, nptes); amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes); pe += nptes * 8; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h index 90138bc5f03d..32775260556f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h @@ -180,6 +180,6 @@ amdgpu_get_next_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int *from) #define for_each_xcp(xcp_mgr, xcp, i) \ for (i = 0, xcp = amdgpu_get_next_xcp(xcp_mgr, &i); xcp; \ - xcp = amdgpu_get_next_xcp(xcp_mgr, &i)) + ++i, xcp = amdgpu_get_next_xcp(xcp_mgr, &i)) #endif diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 26e2188101e7..5e8833e4fed2 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -94,8 +94,6 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, case AMDGPU_RING_TYPE_VCN_ENC: case AMDGPU_RING_TYPE_VCN_JPEG: ip_blk = AMDGPU_XCP_VCN; - if (aqua_vanjaram_xcp_vcn_shared(adev)) - inst_mask = 1 << (inst_idx * 2); break; default: DRM_ERROR("Not support ring type %d!", ring->funcs->type); @@ -105,6 +103,8 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) { if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) { ring->xcp_id = xcp_id; + dev_dbg(adev->dev, "ring:%s xcp_id :%u", ring->name, + ring->xcp_id); if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) adev->gfx.enforce_isolation[xcp_id].xcp_id = xcp_id; break; @@ -394,38 +394,31 @@ static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int x struct amdgpu_xcp_ip *ip) { struct amdgpu_device *adev = xcp_mgr->adev; + int num_sdma, num_vcn, num_shared_vcn, num_xcp; int num_xcc_xcp, num_sdma_xcp, num_vcn_xcp; - int num_sdma, num_vcn; num_sdma = adev->sdma.num_instances; num_vcn = adev->vcn.num_vcn_inst; + num_shared_vcn = 1; + + num_xcc_xcp = adev->gfx.num_xcc_per_xcp; + num_xcp = NUM_XCC(adev->gfx.xcc_mask) / num_xcc_xcp; switch (xcp_mgr->mode) { case AMDGPU_SPX_PARTITION_MODE: - num_sdma_xcp = num_sdma; - num_vcn_xcp = num_vcn; - break; case AMDGPU_DPX_PARTITION_MODE: - num_sdma_xcp = num_sdma / 2; - num_vcn_xcp = num_vcn / 2; - break; case AMDGPU_TPX_PARTITION_MODE: - num_sdma_xcp = num_sdma / 3; - num_vcn_xcp = num_vcn / 3; - break; case AMDGPU_QPX_PARTITION_MODE: - num_sdma_xcp = num_sdma / 4; - num_vcn_xcp = num_vcn / 4; - break; case AMDGPU_CPX_PARTITION_MODE: - num_sdma_xcp = 2; - num_vcn_xcp = num_vcn ? 1 : 0; + num_sdma_xcp = DIV_ROUND_UP(num_sdma, num_xcp); + num_vcn_xcp = DIV_ROUND_UP(num_vcn, num_xcp); break; default: return -EINVAL; } - num_xcc_xcp = adev->gfx.num_xcc_per_xcp; + if (num_vcn && num_xcp > num_vcn) + num_shared_vcn = num_xcp / num_vcn; switch (ip_id) { case AMDGPU_XCP_GFXHUB: @@ -441,7 +434,8 @@ static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int x ip->ip_funcs = &sdma_v4_4_2_xcp_funcs; break; case AMDGPU_XCP_VCN: - ip->inst_mask = XCP_INST_MASK(num_vcn_xcp, xcp_id); + ip->inst_mask = + XCP_INST_MASK(num_vcn_xcp, xcp_id / num_shared_vcn); /* TODO : Assign IP funcs */ break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 09715b506468..81d195d366ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -27,7 +27,7 @@ #include <linux/slab.h> #include <linux/string_helpers.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <drm/drm_util.h> diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 742adbc460c9..70c1399f738d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -1881,6 +1881,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, return r; if (!atomic) { + abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); if (unlikely(r != 0)) { amdgpu_bo_unreserve(abo); @@ -2401,6 +2402,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); amdgpu_bo_unreserve(aobj); if (ret) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 8d46ebadfa46..f154c24499c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -1931,6 +1931,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, return r; if (!atomic) { + abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); if (unlikely(r != 0)) { amdgpu_bo_unreserve(abo); @@ -2485,6 +2486,7 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); amdgpu_bo_unreserve(aobj); if (ret) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index f08dc6a3886f..a7fcb135827f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -1861,6 +1861,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, return r; if (!atomic) { + abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); if (unlikely(r != 0)) { amdgpu_bo_unreserve(abo); @@ -2321,6 +2322,7 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); amdgpu_bo_unreserve(aobj); if (ret) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index a6a3adf2ae13..77ac3f114d24 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -1828,6 +1828,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, return r; if (!atomic) { + abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); if (unlikely(r != 0)) { amdgpu_bo_unreserve(abo); @@ -2320,6 +2321,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); amdgpu_bo_unreserve(aobj); if (ret) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index d1357c01eb39..47b47d21f464 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -202,12 +202,16 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) }; -static const struct soc15_reg_golden golden_settings_gc_12_0[] = { +static const struct soc15_reg_golden golden_settings_gc_12_0_rev0[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f), SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000), SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020) }; +static const struct soc15_reg_golden golden_settings_gc_12_0[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x00008000, 0x00008000), +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -3495,10 +3499,14 @@ static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): + soc15_program_register_sequence(adev, + golden_settings_gc_12_0, + (const u32)ARRAY_SIZE(golden_settings_gc_12_0)); + if (adev->rev_id == 0) soc15_program_register_sequence(adev, - golden_settings_gc_12_0, - (const u32)ARRAY_SIZE(golden_settings_gc_12_0)); + golden_settings_gc_12_0_rev0, + (const u32)ARRAY_SIZE(golden_settings_gc_12_0_rev0)); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 8cf5d7925b51..23f0573ae47b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1345,6 +1345,10 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */ { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 }, + /* https://bbs.openkylin.top/t/topic/171497 */ + { 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 }, + /* HP 705G4 DM with R5 2400G */ + { 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 }, { 0, 0, 0, 0, 0 }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 408e5600bb61..c100845409f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1701,7 +1701,15 @@ static void gfx_v9_4_3_xcc_cp_compute_enable(struct amdgpu_device *adev, WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, 0); } else { WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, - (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); + (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK | + CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK | + CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK | + CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK | + CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK | + CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK | + CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK | + CP_MEC_CNTL__MEC_ME1_HALT_MASK | + CP_MEC_CNTL__MEC_ME2_HALT_MASK)); adev->gfx.kiq[xcc_id].ring.sched.ready = false; } udelay(50); @@ -2240,6 +2248,8 @@ static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id) r = gfx_v9_4_3_xcc_cp_compute_load_microcode(adev, xcc_id); if (r) return r; + } else { + gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id); } r = gfx_v9_4_3_xcc_kiq_resume(adev, xcc_id); @@ -2299,12 +2309,6 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) return 0; } -static void gfx_v9_4_3_xcc_cp_enable(struct amdgpu_device *adev, bool enable, - int xcc_id) -{ - gfx_v9_4_3_xcc_cp_compute_enable(adev, enable, xcc_id); -} - static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id) { if (amdgpu_gfx_disable_kcq(adev, xcc_id)) @@ -2336,7 +2340,7 @@ static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id) } gfx_v9_4_3_xcc_kcq_fini_register(adev, xcc_id); - gfx_v9_4_3_xcc_cp_enable(adev, false, xcc_id); + gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id); } static int gfx_v9_4_3_hw_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index 6c1891889c4d..d4f72e47ae9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -153,7 +153,7 @@ static void imu_v11_0_setup(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val); } - //disble imu Rtavfs, SmsRepair, DfllBTC, and ClkB + //disable imu Rtavfs, SmsRepair, DfllBTC, and ClkB imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10); imu_reg_val |= 0x10007; WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index 71f43a5c7f72..6e0e88076224 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "vcn_v1_0.h" @@ -34,6 +35,9 @@ static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev); static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring); +static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val) { @@ -300,7 +304,10 @@ static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + if (ring->funcs->parse_cs) + amdgpu_ring_write(ring, 0); + else + amdgpu_ring_write(ring, (vmid | (vmid << 4))); amdgpu_ring_write(ring, PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0)); @@ -554,6 +561,7 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = { .get_rptr = jpeg_v1_0_decode_ring_get_rptr, .get_wptr = jpeg_v1_0_decode_ring_get_wptr, .set_wptr = jpeg_v1_0_decode_ring_set_wptr, + .parse_cs = jpeg_v1_dec_ring_parse_cs, .emit_frame_size = 6 + 6 + /* hdp invalidate / flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + @@ -611,3 +619,69 @@ static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring) vcn_v1_0_set_pg_for_begin_use(ring, set_clocks); } + +/** + * jpeg_v1_dec_ring_parse_cs - command submission parser + * + * @parser: Command submission parser context + * @job: the job to parse + * @ib: the IB to parse + * + * Parse the command stream, return -EINVAL for invalid packet, + * 0 otherwise + */ +static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + u32 i, reg, res, cond, type; + int ret = 0; + struct amdgpu_device *adev = parser->adev; + + for (i = 0; i < ib->length_dw ; i += 2) { + reg = CP_PACKETJ_GET_REG(ib->ptr[i]); + res = CP_PACKETJ_GET_RES(ib->ptr[i]); + cond = CP_PACKETJ_GET_COND(ib->ptr[i]); + type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); + + if (res || cond != PACKETJ_CONDITION_CHECK0) /* only allow 0 for now */ + return -EINVAL; + + if (reg >= JPEG_V1_REG_RANGE_START && reg <= JPEG_V1_REG_RANGE_END) + continue; + + switch (type) { + case PACKETJ_TYPE0: + if (reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH && + reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW && + reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH && + reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW && + reg != JPEG_V1_REG_CTX_INDEX && + reg != JPEG_V1_REG_CTX_DATA) { + ret = -EINVAL; + } + break; + case PACKETJ_TYPE1: + if (reg != JPEG_V1_REG_CTX_DATA) + ret = -EINVAL; + break; + case PACKETJ_TYPE3: + if (reg != JPEG_V1_REG_SOFT_RESET) + ret = -EINVAL; + break; + case PACKETJ_TYPE6: + if (ib->ptr[i] != CP_PACKETJ_NOP) + ret = -EINVAL; + break; + default: + ret = -EINVAL; + } + + if (ret) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + break; + } + } + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h index bbf33a6a3972..9654d22e0376 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h @@ -29,4 +29,15 @@ int jpeg_v1_0_sw_init(void *handle); void jpeg_v1_0_sw_fini(void *handle); void jpeg_v1_0_start(struct amdgpu_device *adev, int mode); +#define JPEG_V1_REG_RANGE_START 0x8000 +#define JPEG_V1_REG_RANGE_END 0x803f + +#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x8238 +#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x8239 +#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH 0x825a +#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW 0x825b +#define JPEG_V1_REG_CTX_INDEX 0x8328 +#define JPEG_V1_REG_CTX_DATA 0x8329 +#define JPEG_V1_REG_SOFT_RESET 0x83a0 + #endif /*__JPEG_V1_0_H__*/ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 98aa3ccd0d20..41c0f8750dc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" +#include "amdgpu_cs.h" #include "amdgpu_pm.h" #include "soc15.h" #include "soc15d.h" @@ -538,7 +539,11 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); + + if (ring->funcs->parse_cs) + amdgpu_ring_write(ring, 0); + else + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -764,6 +769,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v2_0_dec_ring_get_rptr, .get_wptr = jpeg_v2_0_dec_ring_get_wptr, .set_wptr = jpeg_v2_0_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -810,3 +816,58 @@ const struct amdgpu_ip_block_version jpeg_v2_0_ip_block = { .rev = 0, .funcs = &jpeg_v2_0_ip_funcs, }; + +/** + * jpeg_v2_dec_ring_parse_cs - command submission parser + * + * @parser: Command submission parser context + * @job: the job to parse + * @ib: the IB to parse + * + * Parse the command stream, return -EINVAL for invalid packet, + * 0 otherwise + */ +int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + u32 i, reg, res, cond, type; + struct amdgpu_device *adev = parser->adev; + + for (i = 0; i < ib->length_dw ; i += 2) { + reg = CP_PACKETJ_GET_REG(ib->ptr[i]); + res = CP_PACKETJ_GET_RES(ib->ptr[i]); + cond = CP_PACKETJ_GET_COND(ib->ptr[i]); + type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); + + if (res) /* only support 0 at the moment */ + return -EINVAL; + + switch (type) { + case PACKETJ_TYPE0: + if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || + reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE3: + if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || + reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE6: + if (ib->ptr[i] == CP_PACKETJ_NOP) + continue; + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + default: + dev_err(adev->dev, "Unknown packet type %d !\n", type); + return -EINVAL; + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h index 654e43e83e2c..63fadda7a673 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h @@ -45,6 +45,9 @@ #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 +#define JPEG_REG_RANGE_START 0x4000 +#define JPEG_REG_RANGE_END 0x41c2 + void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring); void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring); void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, @@ -57,6 +60,9 @@ void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count); +int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); extern const struct amdgpu_ip_block_version jpeg_v2_0_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index d8ef95c847c2..eedb9a829d95 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -662,6 +662,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -691,6 +692,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = { .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 31cfa3ce6528..b1e7fd25afbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -560,6 +560,7 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v3_0_dec_ring_get_rptr, .get_wptr = jpeg_v3_0_dec_ring_get_wptr, .set_wptr = jpeg_v3_0_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 3dac8f259d7f..6c5c1a68a9b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -727,6 +727,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h index 07d36c2abd6b..47638fd4d4e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h @@ -32,5 +32,4 @@ enum amdgpu_jpeg_v4_0_sub_block { }; extern const struct amdgpu_ip_block_version jpeg_v4_0_ip_block; - #endif /* __JPEG_V4_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 6ae5a784e187..86958cb2c2ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -23,9 +23,9 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" -#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" +#include "jpeg_v2_0.h" #include "jpeg_v4_0_3.h" #include "mmsch_v4_0_3.h" @@ -59,6 +59,12 @@ static int amdgpu_ih_srcid_jpeg[] = { VCN_4_0__SRCID__JPEG7_DECODE }; +static inline bool jpeg_v4_0_3_normalizn_reqd(struct amdgpu_device *adev) +{ + return amdgpu_sriov_vf(adev) || + (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)); +} + /** * jpeg_v4_0_3_early_init - set function pointers * @@ -734,32 +740,20 @@ void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); amdgpu_ring_write(ring, 0); - if (ring->adev->jpeg.inst[ring->me].aid_id) { - amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET, - 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x4); - } else { - amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); - amdgpu_ring_write(ring, 0); - } + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); + amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); amdgpu_ring_write(ring, 0x3fbc); - if (ring->adev->jpeg.inst[ring->me].aid_id) { - amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET, - 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x0); - } else { - amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); - amdgpu_ring_write(ring, 0); - } - amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, 0, PACKETJ_TYPE0)); amdgpu_ring_write(ring, 0x1); + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); amdgpu_ring_write(ring, 0); } @@ -834,8 +828,8 @@ void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, { uint32_t reg_offset; - /* For VF, only local offsets should be used */ - if (amdgpu_sriov_vf(ring->adev)) + /* Use normalized offsets if required */ + if (jpeg_v4_0_3_normalizn_reqd(ring->adev)) reg = NORMALIZE_JPEG_REG_OFFSET(reg); reg_offset = (reg << 2); @@ -881,8 +875,8 @@ void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint { uint32_t reg_offset; - /* For VF, only local offsets should be used */ - if (amdgpu_sriov_vf(ring->adev)) + /* Use normalized offsets if required */ + if (jpeg_v4_0_3_normalizn_reqd(ring->adev)) reg = NORMALIZE_JPEG_REG_OFFSET(reg); reg_offset = (reg << 2); @@ -1089,7 +1083,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, - .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -1254,56 +1248,3 @@ static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) { adev->jpeg.ras = &jpeg_v4_0_3_ras; } - -/** - * jpeg_v4_0_3_dec_ring_parse_cs - command submission parser - * - * @parser: Command submission parser context - * @job: the job to parse - * @ib: the IB to parse - * - * Parse the command stream, return -EINVAL for invalid packet, - * 0 otherwise - */ -int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, - struct amdgpu_job *job, - struct amdgpu_ib *ib) -{ - uint32_t i, reg, res, cond, type; - struct amdgpu_device *adev = parser->adev; - - for (i = 0; i < ib->length_dw ; i += 2) { - reg = CP_PACKETJ_GET_REG(ib->ptr[i]); - res = CP_PACKETJ_GET_RES(ib->ptr[i]); - cond = CP_PACKETJ_GET_COND(ib->ptr[i]); - type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); - - if (res) /* only support 0 at the moment */ - return -EINVAL; - - switch (type) { - case PACKETJ_TYPE0: - if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { - dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); - return -EINVAL; - } - break; - case PACKETJ_TYPE3: - if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { - dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); - return -EINVAL; - } - break; - case PACKETJ_TYPE6: - if (ib->ptr[i] == CP_PACKETJ_NOP) - continue; - dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); - return -EINVAL; - default: - dev_err(adev->dev, "Unknown packet type %d !\n", type); - return -EINVAL; - } - } - - return 0; -} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h index 71c54b294e15..747a3e5f6856 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h @@ -46,9 +46,6 @@ #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 -#define JPEG_REG_RANGE_START 0x4000 -#define JPEG_REG_RANGE_END 0x41c2 - extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block; void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, @@ -65,7 +62,5 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring); void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask); -int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, - struct amdgpu_job *job, - struct amdgpu_ib *ib); + #endif /* __JPEG_V4_0_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index f96ac6bce526..44eeed445ea9 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -768,6 +768,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_5_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_5_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_5_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index f4daff90c770..d662aa841f97 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -26,6 +26,7 @@ #include "amdgpu_pm.h" #include "soc15.h" #include "soc15d.h" +#include "jpeg_v2_0.h" #include "jpeg_v4_0_3.h" #include "vcn/vcn_5_0_0_offset.h" @@ -646,7 +647,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, - .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 0f055d1b1da6..231a3d490ea8 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -161,7 +161,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, int api_status_off) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; - signed long timeout = 3000000; /* 3000 ms */ + signed long timeout = 2100000; /* 2100 ms */ struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring[0]; struct MES_API_STATUS *api_status; @@ -415,7 +415,7 @@ static int mes_v11_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_typ /* wait till dequeue take effects */ for (i = 0; i < adev->usec_timeout; i++) { if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) - break; + break; udelay(1); } if (i >= adev->usec_timeout) { diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index e499b2857a01..8d27421689c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -146,7 +146,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, int api_status_off) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; - signed long timeout = 3000000; /* 3000 ms */ + signed long timeout = 2100000; /* 2100 ms */ struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring[pipe]; spinlock_t *ring_lock = &mes->ring_lock[pipe]; @@ -479,6 +479,11 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, union MESAPI__MISC misc_pkt; int pipe; + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + memset(&misc_pkt, 0, sizeof(misc_pkt)); misc_pkt.header.type = MES_API_TYPE_SCHEDULER; @@ -513,6 +518,7 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1; break; case MES_MISC_OP_SET_SHADER_DEBUGGER: + pipe = AMDGPU_MES_SCHED_PIPE; misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER; misc_pkt.set_shader_debugger.process_context_addr = input->set_shader_debugger.process_context_addr; @@ -530,11 +536,6 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, return -EINVAL; } - if (mes->adev->enable_uni_mes) - pipe = AMDGPU_MES_KIQ_PIPE; - else - pipe = AMDGPU_MES_SCHED_PIPE; - return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &misc_pkt, sizeof(misc_pkt), offsetof(union MESAPI__MISC, api_status)); @@ -608,6 +609,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) mes_set_hw_res_pkt.disable_mes_log = 1; mes_set_hw_res_pkt.use_different_vmid_compute = 1; mes_set_hw_res_pkt.enable_reg_active_poll = 1; + mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; /* * Keep oversubscribe timer for sdma . When we have unmapped doorbell diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index fa479dfa1ec1..739fce4fa8fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -365,7 +365,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev, data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; } else { - /* Disbale ASPM L1 */ + /* Disable ASPM L1 */ data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK; /* Disable ASPM TxL0s */ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 1251ee38a676..51e470e8d67d 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -81,6 +81,8 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_4_ta.bin"); /* memory training timeout define */ #define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000 +#define regMP1_PUB_SCRATCH0 0x3b10090 + static int psp_v13_0_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -807,6 +809,20 @@ static bool psp_v13_0_get_ras_capability(struct psp_context *psp) } } +static bool psp_v13_0_is_aux_sos_load_required(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + u32 pmfw_ver; + + if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) + return false; + + /* load 4e version of sos if pmfw version less than 85.115.0 */ + pmfw_ver = RREG32(regMP1_PUB_SCRATCH0 / 4); + + return (pmfw_ver < 0x557300); +} + static const struct psp_funcs psp_v13_0_funcs = { .init_microcode = psp_v13_0_init_microcode, .wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state, @@ -830,6 +846,7 @@ static const struct psp_funcs psp_v13_0_funcs = { .vbflash_stat = psp_v13_0_vbflash_status, .fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk, .get_ras_capability = psp_v13_0_get_ras_capability, + .is_aux_sos_load_required = psp_v13_0_is_aux_sos_load_required, }; void psp_v13_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index aa637541da58..e65194fe94af 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -710,7 +710,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) upper_32_bits(wptr_gpu_addr)); wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]); if (ring->use_pollmem) { - /*wptr polling is not enogh fast, directly clean the wptr register */ + /*wptr polling is not enough fast, directly clean the wptr register */ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index cfd8e183ad50..a8763496aed3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1080,13 +1080,16 @@ static void sdma_v7_0_vm_copy_pte(struct amdgpu_ib *ib, unsigned bytes = count * 8; ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | - SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | + SDMA_PKT_COPY_LINEAR_HEADER_CPV(1); + ib->ptr[ib->length_dw++] = bytes - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src); ib->ptr[ib->length_dw++] = upper_32_bits(src); ib->ptr[ib->length_dw++] = lower_32_bits(pe); ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = 0; } @@ -1744,7 +1747,7 @@ static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev) } static const struct amdgpu_vm_pte_funcs sdma_v7_0_vm_pte_funcs = { - .copy_pte_num_dw = 7, + .copy_pte_num_dw = 8, .copy_pte = sdma_v7_0_vm_copy_pte, .write_pte = sdma_v7_0_vm_write_pte, .set_pte_pde = sdma_v7_0_vm_set_pte_pde, diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c index e4e30b9d481b..c04fdd2d5b38 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c @@ -60,7 +60,7 @@ static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *f { u32 data; - /* CGTT_ROM_CLK_CTRL0 is not availabe for APUs */ + /* CGTT_ROM_CLK_CTRL0 is not available for APUs */ if (adev->flags & AMD_IS_APU) return; diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index b0c3678cfb31..fd4c3d4f8387 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -250,13 +250,6 @@ static void soc24_program_aspm(struct amdgpu_device *adev) adev->nbio.funcs->program_aspm(adev); } -static void soc24_enable_doorbell_aperture(struct amdgpu_device *adev, - bool enable) -{ - adev->nbio.funcs->enable_doorbell_aperture(adev, enable); - adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); -} - const struct amdgpu_ip_block_version soc24_common_ip_block = { .type = AMD_IP_BLOCK_TYPE_COMMON, .major = 1, @@ -454,6 +447,11 @@ static int soc24_common_late_init(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_nv_mailbox_get_irq(adev); + /* Enable selfring doorbell aperture late because doorbell BAR + * aperture will change if resize BAR successfully in gmc sw_init. + */ + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true); + return 0; } @@ -491,7 +489,7 @@ static int soc24_common_hw_init(void *handle) adev->df.funcs->hw_init(adev); /* enable the doorbell aperture */ - soc24_enable_doorbell_aperture(adev, true); + adev->nbio.funcs->enable_doorbell_aperture(adev, true); return 0; } @@ -500,8 +498,13 @@ static int soc24_common_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* disable the doorbell aperture */ - soc24_enable_doorbell_aperture(adev, false); + /* Disable the doorbell aperture and selfring doorbell aperture + * separately in hw_fini because soc21_enable_doorbell_aperture + * has been removed and there is no need to delay disabling + * selfring doorbell. + */ + adev->nbio.funcs->enable_doorbell_aperture(adev, false); + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false); if (amdgpu_sriov_vf(adev)) xgpu_nv_mailbox_put_irq(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index b1fd226b7efb..9d4f5352a62c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -1395,170 +1395,6 @@ static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring) } } -static int vcn_v4_0_5_limit_sched(struct amdgpu_cs_parser *p, - struct amdgpu_job *job) -{ - struct drm_gpu_scheduler **scheds; - - /* The create msg must be in the first IB submitted */ - if (atomic_read(&job->base.entity->fence_seq)) - return -EINVAL; - - /* if VCN0 is harvested, we can't support AV1 */ - if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) - return -EINVAL; - - scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC] - [AMDGPU_RING_PRIO_0].sched; - drm_sched_entity_modify_sched(job->base.entity, scheds, 1); - return 0; -} - -static int vcn_v4_0_5_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, - uint64_t addr) -{ - struct ttm_operation_ctx ctx = { false, false }; - struct amdgpu_bo_va_mapping *map; - uint32_t *msg, num_buffers; - struct amdgpu_bo *bo; - uint64_t start, end; - unsigned int i; - void *ptr; - int r; - - addr &= AMDGPU_GMC_HOLE_MASK; - r = amdgpu_cs_find_mapping(p, addr, &bo, &map); - if (r) { - DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr); - return r; - } - - start = map->start * AMDGPU_GPU_PAGE_SIZE; - end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE; - if (addr & 0x7) { - DRM_ERROR("VCN messages must be 8 byte aligned!\n"); - return -EINVAL; - } - - bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - if (r) { - DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r); - return r; - } - - r = amdgpu_bo_kmap(bo, &ptr); - if (r) { - DRM_ERROR("Failed mapping the VCN message (%d)!\n", r); - return r; - } - - msg = ptr + addr - start; - - /* Check length */ - if (msg[1] > end - addr) { - r = -EINVAL; - goto out; - } - - if (msg[3] != RDECODE_MSG_CREATE) - goto out; - - num_buffers = msg[2]; - for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) { - uint32_t offset, size, *create; - - if (msg[0] != RDECODE_MESSAGE_CREATE) - continue; - - offset = msg[1]; - size = msg[2]; - - if (offset + size > end) { - r = -EINVAL; - goto out; - } - - create = ptr + addr + offset - start; - - /* H264, HEVC and VP9 can run on any instance */ - if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) - continue; - - r = vcn_v4_0_5_limit_sched(p, job); - if (r) - goto out; - } - -out: - amdgpu_bo_kunmap(bo); - return r; -} - -#define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002) -#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) - -#define RADEON_VCN_ENGINE_INFO (0x30000001) -#define RADEON_VCN_ENGINE_INFO_MAX_OFFSET 16 - -#define RENCODE_ENCODE_STANDARD_AV1 2 -#define RENCODE_IB_PARAM_SESSION_INIT 0x00000003 -#define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET 64 - -/* return the offset in ib if id is found, -1 otherwise - * to speed up the searching we only search upto max_offset - */ -static int vcn_v4_0_5_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset) -{ - int i; - - for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) { - if (ib->ptr[i + 1] == id) - return i; - } - return -1; -} - -static int vcn_v4_0_5_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, - struct amdgpu_job *job, - struct amdgpu_ib *ib) -{ - struct amdgpu_ring *ring = amdgpu_job_ring(job); - struct amdgpu_vcn_decode_buffer *decode_buffer; - uint64_t addr; - uint32_t val; - int idx; - - /* The first instance can decode anything */ - if (!ring->me) - return 0; - - /* RADEON_VCN_ENGINE_INFO is at the top of ib block */ - idx = vcn_v4_0_5_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, - RADEON_VCN_ENGINE_INFO_MAX_OFFSET); - if (idx < 0) /* engine info is missing */ - return 0; - - val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */ - if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { - decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6]; - - if (!(decode_buffer->valid_buf_flag & 0x1)) - return 0; - - addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 | - decode_buffer->msg_buffer_address_lo; - return vcn_v4_0_5_dec_msg(p, job, addr); - } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) { - idx = vcn_v4_0_5_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, - RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET); - if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1) - return vcn_v4_0_5_limit_sched(p, job); - } - return 0; -} - static const struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1566,7 +1402,6 @@ static const struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { .get_rptr = vcn_v4_0_5_unified_ring_get_rptr, .get_wptr = vcn_v4_0_5_unified_ring_get_wptr, .set_wptr = vcn_v4_0_5_unified_ring_set_wptr, - .patch_cs_in_place = vcn_v4_0_5_ring_patch_cs_in_place, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 00350eccd571..9044bdb38cf4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -36,7 +36,6 @@ #include <linux/mman.h> #include <linux/ptrace.h> #include <linux/dma-buf.h> -#include <linux/fdtable.h> #include <linux/processor.h> #include "kfd_priv.h" #include "kfd_device_queue_manager.h" @@ -1835,7 +1834,8 @@ static uint32_t get_process_num_bos(struct kfd_process *p) } static int criu_get_prime_handle(struct kgd_mem *mem, - int flags, u32 *shared_fd) + int flags, u32 *shared_fd, + struct file **file) { struct dma_buf *dmabuf; int ret; @@ -1846,13 +1846,14 @@ static int criu_get_prime_handle(struct kgd_mem *mem, return ret; } - ret = dma_buf_fd(dmabuf, flags); + ret = get_unused_fd_flags(flags); if (ret < 0) { pr_err("dmabuf create fd failed, ret:%d\n", ret); goto out_free_dmabuf; } *shared_fd = ret; + *file = dmabuf->file; return 0; out_free_dmabuf: @@ -1860,6 +1861,25 @@ out_free_dmabuf: return ret; } +static void commit_files(struct file **files, + struct kfd_criu_bo_bucket *bo_buckets, + unsigned int count, + int err) +{ + while (count--) { + struct file *file = files[count]; + + if (!file) + continue; + if (err) { + fput(file); + put_unused_fd(bo_buckets[count].dmabuf_fd); + } else { + fd_install(bo_buckets[count].dmabuf_fd, file); + } + } +} + static int criu_checkpoint_bos(struct kfd_process *p, uint32_t num_bos, uint8_t __user *user_bos, @@ -1868,6 +1888,7 @@ static int criu_checkpoint_bos(struct kfd_process *p, { struct kfd_criu_bo_bucket *bo_buckets; struct kfd_criu_bo_priv_data *bo_privs; + struct file **files = NULL; int ret = 0, pdd_index, bo_index = 0, id; void *mem; @@ -1881,6 +1902,12 @@ static int criu_checkpoint_bos(struct kfd_process *p, goto exit; } + files = kvzalloc(num_bos * sizeof(struct file *), GFP_KERNEL); + if (!files) { + ret = -ENOMEM; + goto exit; + } + for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) { struct kfd_process_device *pdd = p->pdds[pdd_index]; struct amdgpu_bo *dumper_bo; @@ -1923,7 +1950,7 @@ static int criu_checkpoint_bos(struct kfd_process *p, ret = criu_get_prime_handle(kgd_mem, bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0, - &bo_bucket->dmabuf_fd); + &bo_bucket->dmabuf_fd, &files[bo_index]); if (ret) goto exit; } else { @@ -1974,12 +2001,8 @@ static int criu_checkpoint_bos(struct kfd_process *p, *priv_offset += num_bos * sizeof(*bo_privs); exit: - while (ret && bo_index--) { - if (bo_buckets[bo_index].alloc_flags - & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) - close_fd(bo_buckets[bo_index].dmabuf_fd); - } - + commit_files(files, bo_buckets, bo_index, ret); + kvfree(files); kvfree(bo_buckets); kvfree(bo_privs); return ret; @@ -2331,7 +2354,8 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, static int criu_restore_bo(struct kfd_process *p, struct kfd_criu_bo_bucket *bo_bucket, - struct kfd_criu_bo_priv_data *bo_priv) + struct kfd_criu_bo_priv_data *bo_priv, + struct file **file) { struct kfd_process_device *pdd; struct kgd_mem *kgd_mem; @@ -2383,7 +2407,7 @@ static int criu_restore_bo(struct kfd_process *p, if (bo_bucket->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) { ret = criu_get_prime_handle(kgd_mem, DRM_RDWR, - &bo_bucket->dmabuf_fd); + &bo_bucket->dmabuf_fd, file); if (ret) return ret; } else { @@ -2400,6 +2424,7 @@ static int criu_restore_bos(struct kfd_process *p, { struct kfd_criu_bo_bucket *bo_buckets = NULL; struct kfd_criu_bo_priv_data *bo_privs = NULL; + struct file **files = NULL; int ret = 0; uint32_t i = 0; @@ -2413,6 +2438,12 @@ static int criu_restore_bos(struct kfd_process *p, if (!bo_buckets) return -ENOMEM; + files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL); + if (!files) { + ret = -ENOMEM; + goto exit; + } + ret = copy_from_user(bo_buckets, (void __user *)args->bos, args->num_bos * sizeof(*bo_buckets)); if (ret) { @@ -2438,7 +2469,7 @@ static int criu_restore_bos(struct kfd_process *p, /* Create and map new BOs */ for (; i < args->num_bos; i++) { - ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]); + ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i], &files[i]); if (ret) { pr_debug("Failed to restore BO[%d] ret%d\n", i, ret); goto exit; @@ -2453,11 +2484,8 @@ static int criu_restore_bos(struct kfd_process *p, ret = -EFAULT; exit: - while (ret && i--) { - if (bo_buckets[i].alloc_flags - & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) - close_fd(bo_buckets[i].dmabuf_fd); - } + commit_files(files, bo_buckets, i, ret); + kvfree(files); kvfree(bo_buckets); kvfree(bo_privs); return ret; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index cd7b81b7b939..48caecf7e72e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1434,7 +1434,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); - pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; + pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; + pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size; i++; } /* Scalar L1 Instruction Cache per SQC */ @@ -1446,6 +1447,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; + pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size; i++; } /* Scalar L1 Data Cache per SQC */ @@ -1456,6 +1458,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; + pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size; i++; } /* GL1 Data Cache per SA */ @@ -1468,6 +1471,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + pcache_info[i].cache_line_size = 0; i++; } /* L2 Data Cache per GPU (Total Tex Cache) */ @@ -1478,6 +1482,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size; i++; } /* L3 Data Cache per GPU */ @@ -1488,6 +1493,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + pcache_info[i].cache_line_size = 0; i++; } return i; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 577d121cc6d1..648f40091aa3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2407,10 +2407,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, pdd->sdma_past_activity_counter += sdma_val; } - list_del(&q->list); - qpd->queue_count--; if (q->properties.is_active) { decrement_queue_count(dqm, qpd, q); + q->properties.is_active = false; if (!dqm->dev->kfd->shared_resources.enable_mes) { retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, @@ -2421,6 +2420,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, retval = remove_queue_mes(dqm, q, qpd); } } + list_del(&q->list); + qpd->queue_count--; /* * Unconditionally decrement this counter, regardless of the queue's @@ -3539,6 +3540,30 @@ int debug_refresh_runlist(struct device_queue_manager *dqm) return debug_map_and_unlock(dqm); } +bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + int doorbell_off, u32 *queue_format) +{ + struct queue *q; + bool r = false; + + if (!queue_format) + return r; + + dqm_lock(dqm); + + list_for_each_entry(q, &qpd->queues_list, list) { + if (q->properties.doorbell_off == doorbell_off) { + *queue_format = q->properties.format; + r = true; + goto out; + } + } + +out: + dqm_unlock(dqm); + return r; +} #if defined(CONFIG_DEBUG_FS) static void seq_reg_dump(struct seq_file *m, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 08b40826ad1e..09ab36f8e8c6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -324,6 +324,9 @@ void set_queue_snapshot_entry(struct queue *q, int debug_lock_and_unmap(struct device_queue_manager *dqm); int debug_map_and_unlock(struct device_queue_manager *dqm); int debug_refresh_runlist(struct device_queue_manager *dqm); +bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + int doorbell_off, u32 *queue_format); static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index bb8cbfc39b90..37b69fe0ede3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -306,23 +306,8 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_UTCL2) { struct kfd_vm_fault_info info = {0}; uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); - uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry); - uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry); - int hub_inst = 0; struct kfd_hsa_memory_exception_data exception_data; - /* gfxhub */ - if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) { - hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev, - node_id); - if (hub_inst < 0) - hub_inst = 0; - } - - /* mmhub */ - if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC) - hub_inst = node_id / 4; - info.vmid = vmid; info.mc_id = client_id; info.page_addr = ih_ring_entry[4] | diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index fecdbbab9894..d46a13156ee9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -167,11 +167,23 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, case SOC15_IH_CLIENTID_SE3SH: case SOC15_IH_CLIENTID_UTCL2: block = AMDGPU_RAS_BLOCK__GFX; - if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) - reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; - else + if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) { + /* driver mode-2 for gfx poison is only supported by + * pmfw 0x00557300 and onwards */ + if (dev->adev->pm.fw_version < 0x00557300) + reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; + else + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + } else if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) { + /* driver mode-2 for gfx poison is only supported by + * pmfw 0x05550C00 and onwards */ + if (dev->adev->pm.fw_version < 0x05550C00) + reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; + else + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + } else { reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + } break; case SOC15_IH_CLIENTID_VMC: case SOC15_IH_CLIENTID_VMC1: @@ -184,11 +196,23 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, case SOC15_IH_CLIENTID_SDMA3: case SOC15_IH_CLIENTID_SDMA4: block = AMDGPU_RAS_BLOCK__SDMA; - if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) - reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; - else + if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2)) { + /* driver mode-2 for gfx poison is only supported by + * pmfw 0x00557300 and onwards */ + if (dev->adev->pm.fw_version < 0x00557300) + reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; + else + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + } else if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) { + /* driver mode-2 for gfx poison is only supported by + * pmfw 0x05550C00 and onwards */ + if (dev->adev->pm.fw_version < 0x05550C00) + reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; + else + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + } else { reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + } break; default: dev_warn(dev->adev->dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c index d163d92a692f..2b72d5b4949b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c @@ -341,6 +341,10 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd, m->sdmax_rlcx_doorbell_offset = q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; + m->sdmax_rlcx_sched_cntl = (amdgpu_sdma_phase_quantum + << SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM__SHIFT) + & SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM_MASK; + m->sdma_engine_id = q->sdma_engine_id; m->sdma_queue_id = q->sdma_queue_id; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 9ae9abc6eb43..d6530febabad 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -868,6 +868,12 @@ struct svm_range_list { struct task_struct *faulting_task; /* check point ts decides if page fault recovery need be dropped */ uint64_t checkpoint_ts[MAX_GPU_INSTANCE]; + + /* Default granularity to use in buffer migration + * and restoration of backing memory while handling + * recoverable page faults + */ + uint8_t default_granularity; }; /* Process data */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index a902950cc060..d07acf1b2f93 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -270,6 +270,11 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) struct kfd_node *dev = NULL; struct kfd_process *proc = NULL; struct kfd_process_device *pdd = NULL; + int i; + struct kfd_cu_occupancy cu_occupancy[AMDGPU_MAX_QUEUES]; + u32 queue_format; + + memset(cu_occupancy, 0x0, sizeof(cu_occupancy)); pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy); dev = pdd->dev; @@ -287,8 +292,29 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) /* Collect wave count from device if it supports */ wave_cnt = 0; max_waves_per_cu = 0; - dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt, - &max_waves_per_cu, 0); + + /* + * For GFX 9.4.3, fetch the CU occupancy from the first XCC in the partition. + * For AQL queues, because of cooperative dispatch we multiply the wave count + * by number of XCCs in the partition to get the total wave counts across all + * XCCs in the partition. + * For PM4 queues, there is no cooperative dispatch so wave_cnt stay as it is. + */ + dev->kfd2kgd->get_cu_occupancy(dev->adev, cu_occupancy, + &max_waves_per_cu, ffs(dev->xcc_mask) - 1); + + for (i = 0; i < AMDGPU_MAX_QUEUES; i++) { + if (cu_occupancy[i].wave_cnt != 0 && + kfd_dqm_is_queue_in_process(dev->dqm, &pdd->qpd, + cu_occupancy[i].doorbell_off, + &queue_format)) { + if (unlikely(queue_format == KFD_QUEUE_FORMAT_PM4)) + wave_cnt += cu_occupancy[i].wave_cnt; + else + wave_cnt += (NUM_XCC(dev->xcc_mask) * + cu_occupancy[i].wave_cnt); + } + } /* Translate wave count to number of compute units */ cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 20ea745729ee..01b960b15274 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -517,7 +517,6 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (retval) goto err_destroy_queue; - kfd_procfs_del_queue(pqn->q); dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); if (retval) { @@ -527,6 +526,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (retval != -ETIME) goto err_destroy_queue; } + kfd_procfs_del_queue(pqn->q); kfd_queue_release_buffers(pdd, &pqn->q->properties); pqm_clean_queue_resource(pqm, pqn); uninit_queue(pqn->q); @@ -1046,6 +1046,7 @@ exit: pr_debug("Queue id %d was restored successfully\n", queue_id); kfree(q_data); + kfree(q_extra_data); return ret; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index ea6a8e43bd5b..de8b9abf7afc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -235,17 +235,16 @@ void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset, amdgpu_reset_get_desc(reset_context, reset_cause, sizeof(reset_cause)); - kfd_smi_event_add(0, dev, event, "%x %s\n", - dev->reset_seq_num, - reset_cause); + kfd_smi_event_add(0, dev, event, KFD_EVENT_FMT_UPDATE_GPU_RESET( + dev->reset_seq_num, reset_cause)); } void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev, uint64_t throttle_bitmask) { - kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n", + kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, KFD_EVENT_FMT_THERMAL_THROTTLING( throttle_bitmask, - amdgpu_dpm_get_thermal_throttling_counter(dev->adev)); + amdgpu_dpm_get_thermal_throttling_counter(dev->adev))); } void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid) @@ -256,8 +255,8 @@ void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid) if (task_info) { /* Report VM faults from user applications, not retry from kernel */ if (task_info->pid) - kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n", - task_info->pid, task_info->task_name); + kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, KFD_EVENT_FMT_VMFAULT( + task_info->pid, task_info->task_name)); amdgpu_vm_put_task_info(task_info); } } @@ -267,16 +266,16 @@ void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid, ktime_t ts) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_START, - "%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid, - address, node->id, write_fault ? 'W' : 'R'); + KFD_EVENT_FMT_PAGEFAULT_START(ktime_to_ns(ts), pid, + address, node->id, write_fault ? 'W' : 'R')); } void kfd_smi_event_page_fault_end(struct kfd_node *node, pid_t pid, unsigned long address, bool migration) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_END, - "%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(), - pid, address, node->id, migration ? 'M' : 'U'); + KFD_EVENT_FMT_PAGEFAULT_END(ktime_get_boottime_ns(), + pid, address, node->id, migration ? 'M' : 'U')); } void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid, @@ -286,9 +285,9 @@ void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid, uint32_t trigger) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_START, - "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", + KFD_EVENT_FMT_MIGRATE_START( ktime_get_boottime_ns(), pid, start, end - start, - from, to, prefetch_loc, preferred_loc, trigger); + from, to, prefetch_loc, preferred_loc, trigger)); } void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid, @@ -296,24 +295,24 @@ void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid, uint32_t from, uint32_t to, uint32_t trigger) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_END, - "%lld -%d @%lx(%lx) %x->%x %d\n", + KFD_EVENT_FMT_MIGRATE_END( ktime_get_boottime_ns(), pid, start, end - start, - from, to, trigger); + from, to, trigger)); } void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid, uint32_t trigger) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_EVICTION, - "%lld -%d %x %d\n", ktime_get_boottime_ns(), pid, - node->id, trigger); + KFD_EVENT_FMT_QUEUE_EVICTION(ktime_get_boottime_ns(), pid, + node->id, trigger)); } void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_RESTORE, - "%lld -%d %x\n", ktime_get_boottime_ns(), pid, - node->id); + KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(), pid, + node->id, 0)); } void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm) @@ -330,8 +329,8 @@ void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm) kfd_smi_event_add(p->lead_thread->pid, pdd->dev, KFD_SMI_EVENT_QUEUE_RESTORE, - "%lld -%d %x %c\n", ktime_get_boottime_ns(), - p->lead_thread->pid, pdd->dev->id, 'R'); + KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(), + p->lead_thread->pid, pdd->dev->id, 'R')); } kfd_unref_process(p); } @@ -341,8 +340,8 @@ void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid, uint32_t trigger) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_UNMAP_FROM_GPU, - "%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(), - pid, address, last - address + 1, node->id, trigger); + KFD_EVENT_FMT_UNMAP_FROM_GPU(ktime_get_boottime_ns(), + pid, address, last - address + 1, node->id, trigger)); } int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 40c94c4cdd96..04e746923697 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -309,12 +309,13 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap) } static void -svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc, - uint8_t *granularity, uint32_t *flags) +svm_range_set_default_attributes(struct svm_range_list *svms, int32_t *location, + int32_t *prefetch_loc, uint8_t *granularity, + uint32_t *flags) { *location = KFD_IOCTL_SVM_LOCATION_UNDEFINED; *prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED; - *granularity = 9; + *granularity = svms->default_granularity; *flags = KFD_IOCTL_SVM_FLAG_HOST_ACCESS | KFD_IOCTL_SVM_FLAG_COHERENT; } @@ -358,7 +359,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, bitmap_copy(prange->bitmap_access, svms->bitmap_supported, MAX_GPU_INSTANCE); - svm_range_set_default_attributes(&prange->preferred_loc, + svm_range_set_default_attributes(svms, &prange->preferred_loc, &prange->prefetch_loc, &prange->granularity, &prange->flags); @@ -2703,9 +2704,10 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, *is_heap_stack = vma_is_initial_heap(vma) || vma_is_initial_stack(vma); start_limit = max(vma->vm_start >> PAGE_SHIFT, - (unsigned long)ALIGN_DOWN(addr, 2UL << 8)); + (unsigned long)ALIGN_DOWN(addr, 1UL << p->svms.default_granularity)); end_limit = min(vma->vm_end >> PAGE_SHIFT, - (unsigned long)ALIGN(addr + 1, 2UL << 8)); + (unsigned long)ALIGN(addr + 1, 1UL << p->svms.default_granularity)); + /* First range that starts after the fault address */ node = interval_tree_iter_first(&p->svms.objects, addr + 1, ULONG_MAX); if (node) { @@ -3249,6 +3251,12 @@ int svm_range_list_init(struct kfd_process *p) if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->adev)) bitmap_set(svms->bitmap_supported, i, 1); + /* Value of default granularity cannot exceed 0x1B, the + * number of pages supported by a 4-level paging table + */ + svms->default_granularity = min_t(u8, amdgpu_svm_default_granularity, 0x1B); + pr_debug("Default SVM Granularity to use: %d\n", svms->default_granularity); + return 0; } @@ -3776,7 +3784,7 @@ svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm, node = interval_tree_iter_first(&svms->objects, start, last); if (!node) { pr_debug("range attrs not found return default values\n"); - svm_range_set_default_attributes(&location, &prefetch_loc, + svm_range_set_default_attributes(svms, &location, &prefetch_loc, &granularity, &flags_and); flags_or = flags_and; if (p->xnack_enabled) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a8d0d1b71723..60c617fcc97e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -770,6 +770,12 @@ static void dmub_hpd_callback(struct amdgpu_device *adev, return; } + /* Skip DMUB HPD IRQ in suspend/resume. We will probe them later. */ + if (notify->type == DMUB_NOTIFICATION_HPD && adev->in_suspend) { + DRM_INFO("Skip DMUB HPD IRQ callback in suspend/resume\n"); + return; + } + link_index = notify->link_index; link = adev->dm.dc->links[link_index]; dev = adev->dm.ddev; @@ -808,6 +814,20 @@ static void dmub_hpd_callback(struct amdgpu_device *adev, } /** + * dmub_hpd_sense_callback - DMUB HPD sense processing callback. + * @adev: amdgpu_device pointer + * @notify: dmub notification structure + * + * HPD sense changes can occur during low power states and need to be + * notified from firmware to driver. + */ +static void dmub_hpd_sense_callback(struct amdgpu_device *adev, + struct dmub_notification *notify) +{ + DRM_DEBUG_DRIVER("DMUB HPD SENSE callback.\n"); +} + +/** * register_dmub_notify_callback - Sets callback for DMUB notify * @adev: amdgpu_device pointer * @type: Type of dmub notification @@ -1757,25 +1777,41 @@ static struct dml2_soc_bb *dm_dmub_get_vbios_bounding_box(struct amdgpu_device * static enum dmub_ips_disable_type dm_get_default_ips_mode( struct amdgpu_device *adev) { - /* - * On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to - * cause a hard hang. A fix exists for newer PMFW. - * - * As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest - * IPS state in all cases, except for s0ix and all displays off (DPMS), - * where IPS2 is allowed. - * - * When checking pmfw version, use the major and minor only. - */ - if (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(3, 5, 0) && - (adev->pm.fw_version & 0x00FFFF00) < 0x005D6300) - return DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; + enum dmub_ips_disable_type ret = DMUB_IPS_ENABLE; - if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 5, 0)) - return DMUB_IPS_ENABLE; + switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { + case IP_VERSION(3, 5, 0): + /* + * On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to + * cause a hard hang. A fix exists for newer PMFW. + * + * As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest + * IPS state in all cases, except for s0ix and all displays off (DPMS), + * where IPS2 is allowed. + * + * When checking pmfw version, use the major and minor only. + */ + if ((adev->pm.fw_version & 0x00FFFF00) < 0x005D6300) + ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; + else if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(11, 5, 0)) + /* + * Other ASICs with DCN35 that have residency issues with + * IPS2 in idle. + * We want them to use IPS2 only in display off cases. + */ + ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; + break; + case IP_VERSION(3, 5, 1): + ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; + break; + default: + /* ASICs older than DCN35 do not have IPSs */ + if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(3, 5, 0)) + ret = DMUB_IPS_DISABLE_ALL; + break; + } - /* ASICs older than DCN35 do not have IPSs */ - return DMUB_IPS_DISABLE_ALL; + return ret; } static int amdgpu_dm_init(struct amdgpu_device *adev) @@ -1996,7 +2032,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) DRM_ERROR("amdgpu: failed to initialize vblank_workqueue.\n"); } - if (adev->dm.dc->caps.ips_support && adev->dm.dc->config.disable_ips == DMUB_IPS_ENABLE) + if (adev->dm.dc->caps.ips_support && + adev->dm.dc->config.disable_ips != DMUB_IPS_DISABLE_ALL) adev->dm.idle_workqueue = idle_create_workqueue(adev); if (adev->dm.dc->caps.max_links > 0 && adev->family >= AMDGPU_FAMILY_RV) { @@ -3808,6 +3845,12 @@ static int register_hpd_handlers(struct amdgpu_device *adev) DRM_ERROR("amdgpu: fail to register dmub hpd callback"); return -EINVAL; } + + if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_SENSE_NOTIFY, + dmub_hpd_sense_callback, true)) { + DRM_ERROR("amdgpu: fail to register dmub hpd sense callback"); + return -EINVAL; + } } list_for_each_entry(connector, @@ -4449,6 +4492,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) #define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 12 #define AMDGPU_DM_DEFAULT_MAX_BACKLIGHT 255 +#define AMDGPU_DM_MIN_SPREAD ((AMDGPU_DM_DEFAULT_MAX_BACKLIGHT - AMDGPU_DM_DEFAULT_MIN_BACKLIGHT) / 2) #define AUX_BL_DEFAULT_TRANSITION_TIME_MS 50 static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm, @@ -4463,6 +4507,21 @@ static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm, return; amdgpu_acpi_get_backlight_caps(&caps); + + /* validate the firmware value is sane */ + if (caps.caps_valid) { + int spread = caps.max_input_signal - caps.min_input_signal; + + if (caps.max_input_signal > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT || + caps.min_input_signal < 0 || + spread > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT || + spread < AMDGPU_DM_MIN_SPREAD) { + DRM_DEBUG_KMS("DM: Invalid backlight caps: min=%d, max=%d\n", + caps.min_input_signal, caps.max_input_signal); + caps.caps_valid = false; + } + } + if (caps.caps_valid) { dm->backlight_caps[bl_idx].caps_valid = true; if (caps.aux_support) @@ -6683,12 +6742,21 @@ create_stream_for_sink(struct drm_connector *connector, if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT || stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST || stream->signal == SIGNAL_TYPE_EDP) { + const struct dc_edid_caps *edid_caps; + unsigned int disable_colorimetry = 0; + + if (aconnector->dc_sink) { + edid_caps = &aconnector->dc_sink->edid_caps; + disable_colorimetry = edid_caps->panel_patch.disable_colorimetry; + } + // // should decide stream support vsc sdp colorimetry capability // before building vsc info packet // stream->use_vsc_sdp_for_colorimetry = stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 && - stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED; + stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED && + !disable_colorimetry; if (stream->out_transfer_func.tf == TRANSFER_FUNCTION_GAMMA22) tf = TRANSFER_FUNC_GAMMA_22; @@ -10610,7 +10678,7 @@ static bool should_reset_plane(struct drm_atomic_state *state, * TODO: We can likely skip bandwidth validation if the only thing that * changed about the plane was it'z z-ordering. */ - if (new_crtc_state->zpos_changed) + if (old_plane_state->normalized_zpos != new_plane_state->normalized_zpos) return true; if (drm_atomic_crtc_needs_modeset(new_crtc_state)) @@ -11458,6 +11526,17 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, drm_dbg(dev, "Failed to determine cursor mode\n"); goto fail; } + + /* + * If overlay cursor is needed, DC cannot go through the + * native cursor update path. All enabled planes on the CRTC + * need to be added for DC to not disable a plane by mistake + */ + if (dm_new_crtc_state->cursor_mode == DM_CURSOR_OVERLAY_MODE) { + ret = drm_atomic_add_affected_planes(state, crtc); + if (ret) + goto fail; + } } /* Remove exiting planes if they are modified */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 2d7755e2b6c3..15d4690c74d6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -50,7 +50,7 @@ #define AMDGPU_DM_MAX_NUM_EDP 2 -#define AMDGPU_DMUB_NOTIFICATION_MAX 6 +#define AMDGPU_DMUB_NOTIFICATION_MAX 7 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x00001A #define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40 diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 50109d13d967..069e0195e50a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -73,6 +73,10 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps) DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id); edid_caps->panel_patch.remove_sink_ext_caps = true; break; + case drm_edid_encode_panel_id('S', 'D', 'C', 0x4154): + DRM_DEBUG_DRIVER("Disabling VSC on monitor with panel id %X\n", panel_id); + edid_caps->panel_patch.disable_colorimetry = true; + break; default: return; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 6b5eed37532b..a08e8a0b696c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1027,6 +1027,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, int remaining_to_try = 0; int ret; uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); + int var_pbn; for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled @@ -1057,13 +1058,18 @@ static int try_disable_dsc(struct drm_atomic_state *state, break; DRM_DEBUG_DRIVER("MST_DSC index #%d, try no compression\n", next_index); + var_pbn = vars[next_index].pbn; vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, vars[next_index].pbn); - if (ret < 0) + if (ret < 0) { + DRM_DEBUG_DRIVER("%s:%d MST_DSC index #%d, failed to set pbn to the state, %d\n", + __func__, __LINE__, next_index, ret); + vars[next_index].pbn = var_pbn; return ret; + } ret = drm_dp_mst_atomic_check(state); if (ret == 0) { @@ -1071,14 +1077,17 @@ static int try_disable_dsc(struct drm_atomic_state *state, vars[next_index].dsc_enabled = false; vars[next_index].bpp_x16 = 0; } else { - DRM_DEBUG_DRIVER("MST_DSC index #%d, restore minimum compression\n", next_index); - vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.max_kbps, fec_overhead_multiplier_x1000); + DRM_DEBUG_DRIVER("MST_DSC index #%d, restore optimized pbn value\n", next_index); + vars[next_index].pbn = var_pbn; ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, vars[next_index].pbn); - if (ret < 0) + if (ret < 0) { + DRM_DEBUG_DRIVER("%s:%d MST_DSC index #%d, failed to set pbn to the state, %d\n", + __func__, __LINE__, next_index, ret); return ret; + } } tried[next_index] = true; @@ -1147,7 +1156,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, params[count].num_slices_v = aconnector->dsc_settings.dsc_num_slices_v; params[count].bpp_overwrite = aconnector->dsc_settings.dsc_bits_per_pixel; params[count].compression_possible = stream->sink->dsc_caps.dsc_dec_caps.is_dsc_supported; - dc_dsc_get_policy_for_timing(params[count].timing, 0, &dsc_policy); + dc_dsc_get_policy_for_timing(params[count].timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link)); if (!dc_dsc_compute_bandwidth_range( stream->sink->ctx->dc->res_pool->dscs[0], stream->sink->ctx->dc->debug.dsc_min_slice_height_override, @@ -1325,7 +1334,7 @@ static bool is_dsc_need_re_compute( if (new_crtc_state->enable && new_crtc_state->active) { if (new_crtc_state->mode_changed || new_crtc_state->active_changed || new_crtc_state->connectors_changed) { - DRM_DEBUG_DRIVER("%s:%d MST_DSC dsc recompte required." + DRM_DEBUG_DRIVER("%s:%d MST_DSC dsc recompute required." "stream 0x%p in new dc_state\n", __func__, __LINE__, stream); is_dsc_need_re_compute = true; @@ -1681,7 +1690,7 @@ static bool is_dsc_common_config_possible(struct dc_stream_state *stream, { struct dc_dsc_policy dsc_policy = {0}; - dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy); + dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link)); dc_dsc_compute_bandwidth_range(stream->sink->ctx->dc->res_pool->dscs[0], stream->sink->ctx->dc->debug.dsc_min_slice_height_override, dsc_policy.min_target_bpp * 16, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 25f63b2e7a8e..495e3cd70426 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -961,6 +961,7 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane, else domain = AMDGPU_GEM_DOMAIN_VRAM; + rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(rbo, domain); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c index 08c494a7a21b..0d5fefb0f591 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c @@ -114,6 +114,7 @@ static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector domain = amdgpu_display_supported_domains(adev, rbo->flags); + rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(rbo, domain); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) diff --git a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c index e47e9db062f4..681799468487 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c @@ -569,7 +569,7 @@ static void calculate_bandwidth( break; } data->lb_partitions[i] = bw_floor2(bw_div(data->lb_size_per_component[i], data->lb_line_pitch), bw_int_to_fixed(1)); - /*clamp the partitions to the maxium number supported by the lb*/ + /* clamp the partitions to the maximum number supported by the lb */ if ((surface_type[i] != bw_def_graphics || dceip->graphics_lb_nodownscaling_multi_line_prefetching == 1)) { data->lb_partitions_max[i] = bw_int_to_fixed(10); } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index f770828df149..0e243f4344d0 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -59,6 +59,7 @@ int clk_mgr_helper_get_active_display_cnt( display_count = 0; for (i = 0; i < context->stream_count; i++) { const struct dc_stream_state *stream = context->streams[i]; + const struct dc_stream_status *stream_status = &context->stream_status[i]; /* Don't count SubVP phantom pipes as part of active * display count @@ -66,13 +67,7 @@ int clk_mgr_helper_get_active_display_cnt( if (dc_state_get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) continue; - /* - * Only notify active stream or virtual stream. - * Need to notify virtual stream to work around - * headless case. HPD does not fire when system is in - * S0i2. - */ - if (!stream->dpms_off || stream->signal == SIGNAL_TYPE_VIRTUAL) + if (!stream->dpms_off || (stream_status && stream_status->plane_count)) display_count++; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 97164b5585a8..b46a3afe48ca 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -1222,6 +1222,12 @@ void dcn35_clk_mgr_construct( ctx->dc->debug.disable_dpp_power_gate = false; ctx->dc->debug.disable_hubp_power_gate = false; ctx->dc->debug.disable_dsc_power_gate = false; + + /* Disable dynamic IPS2 in older PMFW (93.12) for Z8 interop. */ + if (ctx->dc->config.disable_ips == DMUB_IPS_ENABLE && + ctx->dce_version == DCN_VERSION_3_5 && + ((clk_mgr->base.smu_ver & 0x00FFFFFF) <= 0x005d0c00)) + ctx->dc->config.disable_ips = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; } else { /*let's reset the config control flag*/ ctx->dc->config.disable_ips = DMUB_IPS_DISABLE_ALL; /*pmfw not support it, disable it all*/ diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index ae788154896c..5c39390ecbd5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1767,7 +1767,7 @@ bool dc_validate_boot_timing(const struct dc *dc, if (crtc_timing->pix_clk_100hz != pix_clk_100hz) return false; - if (!se->funcs->dp_get_pixel_format) + if (!se || !se->funcs->dp_get_pixel_format) return false; if (!se->funcs->dp_get_pixel_format( @@ -2376,7 +2376,7 @@ static bool is_surface_in_context( return false; } -static enum surface_update_type get_plane_info_update_type(const struct dc_surface_update *u) +static enum surface_update_type get_plane_info_update_type(const struct dc *dc, const struct dc_surface_update *u) { union surface_update_flags *update_flags = &u->surface->update_flags; enum surface_update_type update_type = UPDATE_TYPE_FAST; @@ -2455,7 +2455,7 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa /* todo: below are HW dependent, we should add a hook to * DCE/N resource and validated there. */ - if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR) { + if (!dc->debug.skip_full_updated_if_possible) { /* swizzled mode requires RQ to be setup properly, * thus need to run DML to calculate RQ settings */ @@ -2547,7 +2547,7 @@ static enum surface_update_type det_surface_update(const struct dc *dc, update_flags->raw = 0; // Reset all flags - type = get_plane_info_update_type(u); + type = get_plane_info_update_type(dc, u); elevate_update_type(&overall_type, type); type = get_scaling_info_update_type(dc, u); @@ -2596,6 +2596,12 @@ static enum surface_update_type det_surface_update(const struct dc *dc, elevate_update_type(&overall_type, UPDATE_TYPE_MED); } + if (u->sdr_white_level_nits) + if (u->sdr_white_level_nits != u->surface->sdr_white_level_nits) { + update_flags->bits.sdr_white_level_nits = 1; + elevate_update_type(&overall_type, UPDATE_TYPE_FULL); + } + if (u->cm2_params) { if ((u->cm2_params->component_settings.shaper_3dlut_setting != u->surface->mcm_shaper_3dlut_setting) @@ -2876,6 +2882,10 @@ static void copy_surface_update_to_plane( surface->hdr_mult = srf_update->hdr_mult; + if (srf_update->sdr_white_level_nits) + surface->sdr_white_level_nits = + srf_update->sdr_white_level_nits; + if (srf_update->blend_tf) memcpy(&surface->blend_tf, srf_update->blend_tf, sizeof(surface->blend_tf)); @@ -4679,6 +4689,8 @@ static bool full_update_required(struct dc *dc, srf_updates[i].scaling_info || (srf_updates[i].hdr_mult.value && srf_updates[i].hdr_mult.value != srf_updates->surface->hdr_mult.value) || + (srf_updates[i].sdr_white_level_nits && + srf_updates[i].sdr_white_level_nits != srf_updates->surface->sdr_white_level_nits) || srf_updates[i].in_transfer_func || srf_updates[i].func_shaper || srf_updates[i].lut3d_func || @@ -5744,6 +5756,27 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc, } /** + * dc_process_dmub_dpia_set_tps_notification - Submits tps notification + * + * @dc: [in] dc structure + * @link_index: [in] link index + * @tps: [in] request tps + * + * Submits set_tps_notification command to dmub via inbox message + */ +void dc_process_dmub_dpia_set_tps_notification(const struct dc *dc, uint32_t link_index, uint8_t tps) +{ + union dmub_rb_cmd cmd = {0}; + + cmd.set_tps_notification.header.type = DMUB_CMD__DPIA; + cmd.set_tps_notification.header.sub_type = DMUB_CMD__DPIA_SET_TPS_NOTIFICATION; + cmd.set_tps_notification.tps_notification.instance = dc->links[link_index]->ddc_hw_inst; + cmd.set_tps_notification.tps_notification.tps = tps; + + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); +} + +/** * dc_process_dmub_dpia_hpd_int_enable - Submits DPIA DPD interruption * * @dc: [in] dc structure diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 4c94dd38be4b..3992ad73165b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.299" +#define DC_VER "3.2.301" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -462,6 +462,7 @@ struct dc_config { bool support_edp0_on_dp1; unsigned int enable_fpo_flicker_detection; bool disable_hbr_audio_dp2; + bool consolidated_dpia_dp_lt; }; enum visual_confirm { @@ -762,7 +763,8 @@ union dpia_debug_options { uint32_t disable_mst_dsc_work_around:1; /* bit 3 */ uint32_t enable_force_tbt3_work_around:1; /* bit 4 */ uint32_t disable_usb4_pm_support:1; /* bit 5 */ - uint32_t reserved:26; + uint32_t enable_consolidated_dpia_dp_lt:1; /* bit 6 */ + uint32_t reserved:25; } bits; uint32_t raw; }; @@ -1056,6 +1058,9 @@ struct dc_debug_options { unsigned int force_lls; bool notify_dpia_hr_bw; bool enable_ips_visual_confirm; + unsigned int sharpen_policy; + unsigned int scale_to_sharpness_policy; + bool skip_full_updated_if_possible; }; @@ -1269,6 +1274,7 @@ union surface_update_flags { uint32_t tmz_changed:1; uint32_t mcm_transfer_function_enable_change:1; /* disable or enable MCM transfer func */ uint32_t full_update:1; + uint32_t sdr_white_level_nits:1; } bits; uint32_t raw; @@ -1351,6 +1357,7 @@ struct dc_plane_state { bool adaptive_sharpness_en; int sharpness_level; enum linear_light_scaling linear_light_scaling; + unsigned int sdr_white_level_nits; }; struct dc_plane_info { @@ -1508,6 +1515,7 @@ struct dc_surface_update { */ struct dc_cm2_parameters *cm2_params; const struct dc_csc_transform *cursor_csc_color_matrix; + unsigned int sdr_white_level_nits; }; /* @@ -2520,6 +2528,8 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc, uint8_t mst_alloc_slots, uint8_t *mst_slots_in_use); +void dc_process_dmub_dpia_set_tps_notification(const struct dc *dc, uint32_t link_index, uint8_t tps); + void dc_process_dmub_dpia_hpd_int_enable(const struct dc *dc, uint32_t hpd_int_enable); diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 519c3df78ee5..41bd95e9177a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -969,6 +969,14 @@ union dp_sink_video_fallback_formats { uint8_t raw; }; +union dpcd_max_uncompressed_pixel_rate_cap { + struct { + uint16_t max_uncompressed_pixel_rate_cap :15; + uint16_t valid :1; + } bits; + uint8_t raw[2]; +}; + union dp_fec_capability1 { struct { uint8_t AGGREGATED_ERROR_COUNTERS_CAPABLE :1; @@ -1170,6 +1178,7 @@ struct dpcd_caps { struct dc_lttpr_caps lttpr_caps; struct adaptive_sync_caps adaptive_sync_caps; struct dpcd_usb4_dp_tunneling_info usb4_dp_tun_info; + union dpcd_max_uncompressed_pixel_rate_cap max_uncompressed_pixel_rate_cap; union dp_128b_132b_supported_link_rates dp_128b_132b_supported_link_rates; union dp_main_line_channel_coding_cap channel_coding_cap; @@ -1340,6 +1349,9 @@ struct dp_trace { #ifndef DP_CABLE_ATTRIBUTES_UPDATED_BY_DPTX #define DP_CABLE_ATTRIBUTES_UPDATED_BY_DPTX 0x110 #endif +#ifndef DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP +#define DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP 0x221c +#endif #ifndef DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE #define DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE 0x50 #endif diff --git a/drivers/gpu/drm/amd/display/dc/dc_dsc.h b/drivers/gpu/drm/amd/display/dc/dc_dsc.h index fe3078b8789e..9014c2409817 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dsc.h @@ -59,6 +59,7 @@ struct dc_dsc_config_options { uint32_t max_target_bpp_limit_override_x16; uint32_t slice_height_granularity; uint32_t dsc_force_odm_hslice_override; + bool force_dsc_when_not_needed; }; bool dc_dsc_parse_dsc_dpcd(const struct dc *dc, @@ -100,7 +101,8 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps( */ void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, uint32_t max_target_bpp_limit_override_x16, - struct dc_dsc_policy *policy); + struct dc_dsc_policy *policy, + const enum dc_link_encoding_format link_encoding); void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit); diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index cd6de93eb91c..603552dbd771 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -186,19 +186,17 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl spl_in->h_active = pipe_ctx->plane_res.scl_data.h_active; spl_in->v_active = pipe_ctx->plane_res.scl_data.v_active; + + spl_in->debug.sharpen_policy = (enum sharpen_policy)pipe_ctx->stream->ctx->dc->debug.sharpen_policy; + spl_in->debug.scale_to_sharpness_policy = + (enum scale_to_sharpness_policy)pipe_ctx->stream->ctx->dc->debug.scale_to_sharpness_policy; + /* Check if it is stream is in fullscreen and if its HDR. * Use this to determine sharpness levels */ spl_in->is_fullscreen = dm_helpers_is_fullscreen(pipe_ctx->stream->ctx, pipe_ctx->stream); spl_in->is_hdr_on = dm_helpers_is_hdr_on(pipe_ctx->stream->ctx, pipe_ctx->stream); - spl_in->hdr_multx100 = 0; - if (spl_in->is_hdr_on) { - spl_in->hdr_multx100 = (uint32_t)dc_fixpt_floor(dc_fixpt_mul(plane_state->hdr_mult, - dc_fixpt_from_int(100))); - /* Disable sharpness for HDR Mult > 6.0 */ - if (spl_in->hdr_multx100 > 600) - spl_in->adaptive_sharpness.enable = false; - } + spl_in->sdr_white_level_nits = plane_state->sdr_white_level_nits; } /// @brief Translate SPL output parameters to pipe context diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index fd6dca735714..6d7989b751e2 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -178,6 +178,7 @@ struct dc_panel_patch { unsigned int skip_avmute; unsigned int mst_start_top_delay; unsigned int remove_sink_ext_caps; + unsigned int disable_colorimetry; }; struct dc_edid_caps { diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index ee02b78e290f..838d72eaa87f 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -1748,10 +1748,6 @@ void dccg35_init(struct dccg *dccg) dccg35_set_dpstreamclk_root_clock_gating(dccg, otg_inst, false); } - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) - for (otg_inst = 0; otg_inst < 4; otg_inst++) - dccg35_set_dppclk_root_clock_gating(dccg, otg_inst, 0); - /* dccg35_enable_global_fgcg_rep( dccg, dccg->ctx->dc->debug.enable_fine_grain_clock_gating.bits @@ -1932,47 +1928,32 @@ static void dccg35_enable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, } /*get other front end connected to this backend*/ -static uint8_t dccg35_get_other_enabled_symclk_fe(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst) +static uint8_t dccg35_get_number_enabled_symclk_fe_connected_to_be(struct dccg *dccg, uint32_t link_enc_inst) { uint8_t num_enabled_symclk_fe = 0; - uint32_t be_clk_en = 0, fe_clk_en[5] = {0}, be_clk_sel[5] = {0}; + uint32_t fe_clk_en[5] = {0}, be_clk_sel[5] = {0}; struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - switch (link_enc_inst) { - case 0: - REG_GET_3(SYMCLKA_CLOCK_ENABLE, SYMCLKA_CLOCK_ENABLE, &be_clk_en, - SYMCLKA_FE_EN, &fe_clk_en[0], - SYMCLKA_FE_SRC_SEL, &be_clk_sel[0]); - break; - case 1: - REG_GET_3(SYMCLKB_CLOCK_ENABLE, SYMCLKB_CLOCK_ENABLE, &be_clk_en, - SYMCLKB_FE_EN, &fe_clk_en[1], - SYMCLKB_FE_SRC_SEL, &be_clk_sel[1]); - break; - case 2: - REG_GET_3(SYMCLKC_CLOCK_ENABLE, SYMCLKC_CLOCK_ENABLE, &be_clk_en, - SYMCLKC_FE_EN, &fe_clk_en[2], - SYMCLKC_FE_SRC_SEL, &be_clk_sel[2]); - break; - case 3: - REG_GET_3(SYMCLKD_CLOCK_ENABLE, SYMCLKD_CLOCK_ENABLE, &be_clk_en, - SYMCLKD_FE_EN, &fe_clk_en[3], - SYMCLKD_FE_SRC_SEL, &be_clk_sel[3]); - break; - case 4: - REG_GET_3(SYMCLKE_CLOCK_ENABLE, SYMCLKE_CLOCK_ENABLE, &be_clk_en, - SYMCLKE_FE_EN, &fe_clk_en[4], - SYMCLKE_FE_SRC_SEL, &be_clk_sel[4]); - break; - } - if (be_clk_en) { - /* for DPMST, this backend could be used by multiple front end. - only disable the backend if this stream_enc_ins is the last active stream enc connected to this back_end*/ - uint8_t i; - for (i = 0; i != link_enc_inst && i < ARRAY_SIZE(fe_clk_en); i++) { - if (fe_clk_en[i] && be_clk_sel[i] == link_enc_inst) - num_enabled_symclk_fe++; - } + REG_GET_2(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_EN, &fe_clk_en[0], + SYMCLKA_FE_SRC_SEL, &be_clk_sel[0]); + + REG_GET_2(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_EN, &fe_clk_en[1], + SYMCLKB_FE_SRC_SEL, &be_clk_sel[1]); + + REG_GET_2(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_EN, &fe_clk_en[2], + SYMCLKC_FE_SRC_SEL, &be_clk_sel[2]); + + REG_GET_2(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_EN, &fe_clk_en[3], + SYMCLKD_FE_SRC_SEL, &be_clk_sel[3]); + + REG_GET_2(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_EN, &fe_clk_en[4], + SYMCLKE_FE_SRC_SEL, &be_clk_sel[4]); + + uint8_t i; + + for (i = 0; i < ARRAY_SIZE(fe_clk_en); i++) { + if (fe_clk_en[i] && be_clk_sel[i] == link_enc_inst) + num_enabled_symclk_fe++; } return num_enabled_symclk_fe; } @@ -2020,9 +2001,9 @@ static void dccg35_disable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst break; } - /*check other enabled symclk fe */ - num_enabled_symclk_fe = dccg35_get_other_enabled_symclk_fe(dccg, stream_enc_inst, link_enc_inst); - /*only turn off backend clk if other front end attachecd to this backend are all off, + /*check other enabled symclk fe connected to this be */ + num_enabled_symclk_fe = dccg35_get_number_enabled_symclk_fe_connected_to_be(dccg, link_enc_inst); + /*only turn off backend clk if other front end attached to this backend are all off, for mst, only turn off the backend if this is the last front end*/ if (num_enabled_symclk_fe == 0) { switch (link_enc_inst) { @@ -2351,6 +2332,14 @@ static void dccg35_disable_symclk_se_cb( /* DMU PHY sequence switches SYMCLK_BE (link_enc_inst) to ref clock once PHY is turned off */ } +void dccg35_root_gate_disable_control(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating) +{ + + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) { + dccg35_set_dppclk_root_clock_gating(dccg, pipe_idx, disable_clock_gating); + } +} + static const struct dccg_funcs dccg35_funcs_new = { .update_dpp_dto = dccg35_update_dpp_dto_cb, .dpp_root_clock_control = dccg35_dpp_root_clock_control_cb, @@ -2411,7 +2400,7 @@ static const struct dccg_funcs dccg35_funcs = { .enable_symclk_se = dccg35_enable_symclk_se, .disable_symclk_se = dccg35_disable_symclk_se, .set_dtbclk_p_src = dccg35_set_dtbclk_p_src, - + .dccg_root_gate_disable_control = dccg35_root_gate_disable_control, }; struct dccg *dccg35_create( diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h index 1586a45ca3bd..51f98c5c51c4 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h @@ -241,6 +241,7 @@ struct dccg *dccg35_create( void dccg35_init(struct dccg *dccg); void dccg35_enable_global_fgcg_rep(struct dccg *dccg, bool value); +void dccg35_root_gate_disable_control(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating); #endif //__DCN35_DCCG_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c index e7019c95ba79..4fce64a030b6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c @@ -313,9 +313,6 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib, if (swath_height_c > 0) log2_swath_height_c = dml_log2(swath_height_c); - - if (req128_c && log2_swath_height_c > 0) - log2_swath_height_c -= 1; } rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c index ae5251041728..3fa9a5da02f6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c @@ -313,9 +313,6 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib, if (swath_height_c > 0) log2_swath_height_c = dml_log2(swath_height_c); - - if (req128_c && log2_swath_height_c > 0) - log2_swath_height_c -= 1; } rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index 0b132ce1d2cd..2b275e680379 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -1924,15 +1924,6 @@ static unsigned int CalculateVMAndRowBytes( *PixelPTEReqWidth = 32768.0 / BytePerPixel; *PTERequestSize = 64; FractionOfPTEReturnDrop = 0; - } else if (MacroTileSizeBytes == 4096) { - PixelPTEReqHeightPTEs = 1; - *PixelPTEReqHeight = MacroTileHeight; - *PixelPTEReqWidth = 8 * *MacroTileWidth; - *PTERequestSize = 64; - if (ScanDirection != dm_vert) - FractionOfPTEReturnDrop = 0; - else - FractionOfPTEReturnDrop = 7.0 / 8; } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) { PixelPTEReqHeightPTEs = 16; *PixelPTEReqHeight = 16 * BlockHeight256Bytes; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 547dfcc80fde..d851c081e376 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -8926,7 +8926,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc // The prefetch scheduling should only be calculated once as per AllowForPStateChangeOrStutterInVBlank requirement // If the AllowForPStateChangeOrStutterInVBlank requirement is not strict (i.e. only try those power saving feature - // if possible, then will try to program for the best power saving features in order of diffculty (dram, fclk, stutter) + // if possible, then will try to program for the best power saving features in order of difficulty (dram, fclk, stutter) s->iteration = 0; s->MaxTotalRDBandwidth = 0; s->AllPrefetchModeTested = false; @@ -9977,7 +9977,7 @@ void dml_core_get_row_heights( dml_print("DML_DLG: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); #endif - // just suppluy with enough parameters to calculate meta and dte + // just supply with enough parameters to calculate meta and dte CalculateVMAndRowBytes( 0, // dml_bool_t ViewportStationary, 1, // dml_bool_t DCCEnable, @@ -10110,7 +10110,7 @@ dml_bool_t dml_mode_support( /// Note: In this function, it is assumed that DCFCLK, SOCCLK freq are the state values, and mode_program will just use the DML calculated DPPCLK and DISPCLK /// @param mode_lib mode_lib data struct that house all the input/output/bbox and calculation values. /// @param state_idx Power state idx chosen -/// @param display_cfg Display Congiuration +/// @param display_cfg Display Configuration /// @param call_standalone Calling mode_programming without calling mode support. Some of the "support" struct member will be pre-calculated before doing mode programming /// TODO: Add clk_cfg input, could be useful for standalone mode dml_bool_t dml_mode_programming( diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index b0d9aed0f265..8697eac1e1f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -858,7 +858,9 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm plane->immediate_flip = plane_state->flip_immediate; - plane->composition.rect_out_height_spans_vactive = plane_state->dst_rect.height >= stream->timing.v_addressable; + plane->composition.rect_out_height_spans_vactive = + plane_state->dst_rect.height >= stream->timing.v_addressable && + stream->dst.height >= stream->timing.v_addressable; } //TODO : Could be possibly moved to a common helper layer. diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index d63558ee3135..1cf9015e854a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -940,9 +940,11 @@ static void build_synchronized_timing_groups( /* find synchronizable timing groups */ for (j = i + 1; j < display_config->display_config.num_streams; j++) { if (memcmp(master_timing, - &display_config->display_config.stream_descriptors[j].timing, - sizeof(struct dml2_timing_cfg)) == 0 && - display_config->display_config.stream_descriptors[i].output.output_encoder == display_config->display_config.stream_descriptors[j].output.output_encoder) { + &display_config->display_config.stream_descriptors[j].timing, + sizeof(struct dml2_timing_cfg)) == 0 && + display_config->display_config.stream_descriptors[i].output.output_encoder == display_config->display_config.stream_descriptors[j].output.output_encoder && + (display_config->display_config.stream_descriptors[i].output.output_encoder != dml2_hdmi || //hdmi requires formats match + display_config->display_config.stream_descriptors[i].output.output_format == display_config->display_config.stream_descriptors[j].output.output_format)) { set_bit_in_bitfield(&pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], j); set_bit_in_bitfield(&stream_mapped_mask, j); } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c index c4c52173ef22..11c904ae2958 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c @@ -303,7 +303,6 @@ void build_unoptimized_policy_settings(enum dml_project_id project, struct dml_m if (project == dml_project_dcn35 || project == dml_project_dcn351) { policy->DCCProgrammingAssumesScanDirectionUnknownFinal = false; - policy->EnhancedPrefetchScheduleAccelerationFinal = 0; policy->AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter_if_possible; /*new*/ policy->UseOnlyMaxPrefetchModes = 1; } diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c index 01f98139292e..5105fd580017 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c @@ -951,6 +951,8 @@ static void dpp401_dscl_set_isharp_filter( * * @dpp_base: High level DPP struct * @scl_data: scalaer_data info + * @program_isharp_1dlut: flag to program isharp 1D LUT + * @bs_coeffs_updated: Blur and Scale Coefficients update flag * * This is the primary function to program isharp * diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index a1727e5bf024..ebd5df1a36e8 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -668,6 +668,7 @@ static bool decide_dsc_bandwidth_range( */ static bool decide_dsc_target_bpp_x16( const struct dc_dsc_policy *policy, + const struct dc_dsc_config_options *options, const struct dsc_enc_caps *dsc_common_caps, const int target_bandwidth_kbps, const struct dc_crtc_timing *timing, @@ -682,7 +683,7 @@ static bool decide_dsc_target_bpp_x16( if (decide_dsc_bandwidth_range(policy->min_target_bpp * 16, policy->max_target_bpp * 16, num_slices_h, dsc_common_caps, timing, link_encoding, &range)) { if (target_bandwidth_kbps >= range.stream_kbps) { - if (policy->enable_dsc_when_not_needed) + if (policy->enable_dsc_when_not_needed || options->force_dsc_when_not_needed) /* enable max bpp even dsc is not needed */ *target_bpp_x16 = range.max_target_bpp_x16; } else if (target_bandwidth_kbps >= range.max_kbps) { @@ -882,7 +883,7 @@ static bool setup_dsc_config( memset(dsc_cfg, 0, sizeof(struct dc_dsc_config)); - dc_dsc_get_policy_for_timing(timing, options->max_target_bpp_limit_override_x16, &policy); + dc_dsc_get_policy_for_timing(timing, options->max_target_bpp_limit_override_x16, &policy, link_encoding); pic_width = timing->h_addressable + timing->h_border_left + timing->h_border_right; pic_height = timing->v_addressable + timing->v_border_top + timing->v_border_bottom; @@ -1080,6 +1081,7 @@ static bool setup_dsc_config( if (target_bandwidth_kbps > 0) { is_dsc_possible = decide_dsc_target_bpp_x16( &policy, + options, &dsc_common_caps, target_bandwidth_kbps, timing, @@ -1171,7 +1173,8 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps( void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, uint32_t max_target_bpp_limit_override_x16, - struct dc_dsc_policy *policy) + struct dc_dsc_policy *policy, + const enum dc_link_encoding_format link_encoding) { uint32_t bpc = 0; @@ -1235,10 +1238,7 @@ void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, policy->max_target_bpp = max_target_bpp_limit_override_x16 / 16; /* enable DSC when not needed, default false */ - if (dsc_policy_enable_dsc_when_not_needed) - policy->enable_dsc_when_not_needed = dsc_policy_enable_dsc_when_not_needed; - else - policy->enable_dsc_when_not_needed = false; + policy->enable_dsc_when_not_needed = dsc_policy_enable_dsc_when_not_needed; } void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit) @@ -1267,4 +1267,5 @@ void dc_dsc_get_default_config_option(const struct dc *dc, struct dc_dsc_config_ options->dsc_force_odm_hslice_override = dc->debug.force_odm_combine; options->max_target_bpp_limit_override_x16 = 0; options->slice_height_granularity = 1; + options->force_dsc_when_not_needed = false; } diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c index 6293173ba2b9..5eb3da8d5206 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c @@ -545,6 +545,7 @@ static void hubbub35_init(struct hubbub *hubbub) DCHUBBUB_ARB_MAX_REQ_OUTSTAND, 256, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 256); + memset(&hubbub2->watermarks.a.cstate_pstate, 0, sizeof(hubbub2->watermarks.a.cstate_pstate)); } /*static void hubbub35_set_request_limit(struct hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index d52ce58c6a98..4fbed0298adf 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -57,6 +57,7 @@ #include "panel_cntl.h" #include "dc_state_priv.h" #include "dpcd_defs.h" +#include "dsc.h" /* include DCE11 register header files */ #include "dce/dce_11_0_d.h" #include "dce/dce_11_0_sh_mask.h" @@ -1823,6 +1824,48 @@ static void get_edp_links_with_sink( } } +static void clean_up_dsc_blocks(struct dc *dc) +{ + struct display_stream_compressor *dsc = NULL; + struct timing_generator *tg = NULL; + struct stream_encoder *se = NULL; + struct dccg *dccg = dc->res_pool->dccg; + struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl; + int i; + + if (dc->ctx->dce_version != DCN_VERSION_3_5 && + dc->ctx->dce_version != DCN_VERSION_3_51) + return; + + for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) { + struct dcn_dsc_state s = {0}; + + dsc = dc->res_pool->dscs[i]; + dsc->funcs->dsc_read_state(dsc, &s); + if (s.dsc_fw_en) { + /* disable DSC in OPTC */ + if (i < dc->res_pool->timing_generator_count) { + tg = dc->res_pool->timing_generators[i]; + tg->funcs->set_dsc_config(tg, OPTC_DSC_DISABLED, 0, 0); + } + /* disable DSC in stream encoder */ + if (i < dc->res_pool->stream_enc_count) { + se = dc->res_pool->stream_enc[i]; + se->funcs->dp_set_dsc_config(se, OPTC_DSC_DISABLED, 0, 0); + se->funcs->dp_set_dsc_pps_info_packet(se, false, NULL, true); + } + /* disable DSC block */ + if (dccg->funcs->set_ref_dscclk) + dccg->funcs->set_ref_dscclk(dccg, dsc->inst); + dsc->funcs->dsc_disable(dsc); + + /* power down DSC */ + if (pg_cntl != NULL) + pg_cntl->funcs->dsc_pg_control(pg_cntl, dsc->inst, false); + } + } +} + /* * When ASIC goes from VBIOS/VGA mode to driver/accelerated mode we need: * 1. Power down all DC HW blocks @@ -1927,6 +1970,13 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr); power_down_all_hw_blocks(dc); + + /* DSC could be enabled on eDP during VBIOS post. + * To clean up dsc blocks if eDP is in link but not active. + */ + if (edp_link_with_sink && (edp_stream_num == 0)) + clean_up_dsc_blocks(dc); + disable_vga_and_power_gate_all_controllers(dc); if (edp_link_with_sink && !keep_edp_vdd_on) dc->hwss.edp_power_control(edp_link_with_sink, false); @@ -2046,13 +2096,20 @@ static void set_drr(struct pipe_ctx **pipe_ctx, * as well. */ for (i = 0; i < num_pipes; i++) { - pipe_ctx[i]->stream_res.tg->funcs->set_drr( - pipe_ctx[i]->stream_res.tg, ¶ms); - - if (adjust.v_total_max != 0 && adjust.v_total_min != 0) - pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx[i]->stream_res.tg, - event_triggers, num_frames); + /* dc_state_destruct() might null the stream resources, so fetch tg + * here first to avoid a race condition. The lifetime of the pointee + * itself (the timing_generator object) is not a problem here. + */ + struct timing_generator *tg = pipe_ctx[i]->stream_res.tg; + + if ((tg != NULL) && tg->funcs) { + if (tg->funcs->set_drr) + tg->funcs->set_drr(tg, ¶ms); + if (adjust.v_total_max != 0 && adjust.v_total_min != 0) + if (tg->funcs->set_static_screen_control) + tg->funcs->set_static_screen_control( + tg, event_triggers, num_frames); + } } } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 01dffed4d30b..a6a1db5ba8ba 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -3212,15 +3212,19 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx, * as well. */ for (i = 0; i < num_pipes; i++) { - if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) { - if (pipe_ctx[i]->stream_res.tg->funcs->set_drr) - pipe_ctx[i]->stream_res.tg->funcs->set_drr( - pipe_ctx[i]->stream_res.tg, ¶ms); + /* dc_state_destruct() might null the stream resources, so fetch tg + * here first to avoid a race condition. The lifetime of the pointee + * itself (the timing_generator object) is not a problem here. + */ + struct timing_generator *tg = pipe_ctx[i]->stream_res.tg; + + if ((tg != NULL) && tg->funcs) { + if (tg->funcs->set_drr) + tg->funcs->set_drr(tg, ¶ms); if (adjust.v_total_max != 0 && adjust.v_total_min != 0) - if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control) - pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx[i]->stream_res.tg, - event_triggers, num_frames); + if (tg->funcs->set_static_screen_control) + tg->funcs->set_static_screen_control( + tg, event_triggers, num_frames); } } } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index 42c52284a868..bded33575493 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -455,7 +455,7 @@ bool dcn30_mmhubbub_warmup( struct mcif_wb *mcif_wb; struct mcif_warmup_params warmup_params = {0}; unsigned int i, i_buf; - /*make sure there is no active DWB eanbled */ + /* make sure there is no active DWB enabled */ for (i = 0; i < num_dwb; i++) { dwb = dc->res_pool->dwbc[wb_info[i].dwb_pipe_inst]; if (dwb->dwb_is_efc_transition || dwb->dwb_is_drc) { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index a36e11606f90..2e8c9f738259 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -1032,6 +1032,20 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) struct dsc_config dsc_cfg; struct dsc_optc_config dsc_optc_cfg = {0}; enum optc_dsc_mode optc_dsc_mode; + struct dcn_dsc_state dsc_state = {0}; + + if (!dsc) { + DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst); + return; + } + + if (dsc->funcs->dsc_read_state) { + dsc->funcs->dsc_read_state(dsc, &dsc_state); + if (!dsc_state.dsc_fw_en) { + DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst); + return; + } + } /* Enable DSC hw block */ dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index a4c6decee0f8..bd309dbdf7b2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -240,6 +240,10 @@ void dcn35_init_hw(struct dc *dc) dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter); } + if (res_pool->dccg->funcs->dccg_root_gate_disable_control) { + for (i = 0; i < res_pool->pipe_count; i++) + res_pool->dccg->funcs->dccg_root_gate_disable_control(res_pool->dccg, i, 0); + } for (i = 0; i < res_pool->audio_count; i++) { struct audio *audio = res_pool->audios[i]; @@ -330,7 +334,20 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) struct dsc_config dsc_cfg; struct dsc_optc_config dsc_optc_cfg = {0}; enum optc_dsc_mode optc_dsc_mode; + struct dcn_dsc_state dsc_state = {0}; + + if (!dsc) { + DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst); + return; + } + if (dsc->funcs->dsc_read_state) { + dsc->funcs->dsc_read_state(dsc, &dsc_state); + if (!dsc_state.dsc_fw_en) { + DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst); + return; + } + } /* Enable DSC hw block */ dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt; dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom; @@ -1414,7 +1431,13 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx, params.vertical_total_mid_frame_num = adjust.v_total_mid_frame_num; for (i = 0; i < num_pipes; i++) { - if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) { + /* dc_state_destruct() might null the stream resources, so fetch tg + * here first to avoid a race condition. The lifetime of the pointee + * itself (the timing_generator object) is not a problem here. + */ + struct timing_generator *tg = pipe_ctx[i]->stream_res.tg; + + if ((tg != NULL) && tg->funcs) { if (pipe_ctx[i]->stream && pipe_ctx[i]->stream->ctx->dc->debug.static_screen_wait_frames) { struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing; struct dc *dc = pipe_ctx[i]->stream->ctx->dc; @@ -1426,14 +1449,12 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx, num_frames = 2 * (frame_rate % 60); } } - if (pipe_ctx[i]->stream_res.tg->funcs->set_drr) - pipe_ctx[i]->stream_res.tg->funcs->set_drr( - pipe_ctx[i]->stream_res.tg, ¶ms); + if (tg->funcs->set_drr) + tg->funcs->set_drr(tg, ¶ms); if (adjust.v_total_max != 0 && adjust.v_total_min != 0) - if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control) - pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx[i]->stream_res.tg, - event_triggers, num_frames); + if (tg->funcs->set_static_screen_control) + tg->funcs->set_static_screen_control( + tg, event_triggers, num_frames); } } } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index d619eb229a62..e94e9ba60f55 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -213,6 +213,7 @@ struct dccg_funcs { uint32_t otg_inst); void (*set_dto_dscclk)(struct dccg *dccg, uint32_t dsc_inst); void (*set_ref_dscclk)(struct dccg *dccg, uint32_t dsc_inst); + void (*dccg_root_gate_disable_control)(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating); }; #endif //__DAL_DCCG_H__ diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c index 46fb3649bc86..6499807af72a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c @@ -50,8 +50,31 @@ static void update_dpia_stream_allocation_table(struct dc_link *link, DC_LOG_MST("dpia : status[%d]: alloc_slots[%d]: used_slots[%d]\n", status, mst_alloc_slots, prev_mst_slots_in_use); - ASSERT(link_enc); - link_enc->funcs->update_mst_stream_allocation_table(link_enc, table); + if (link_enc) + link_enc->funcs->update_mst_stream_allocation_table(link_enc, table); +} + +static void set_dio_dpia_link_test_pattern(struct dc_link *link, + const struct link_resource *link_res, + struct encoder_set_dp_phy_pattern_param *tp_params) +{ + if (tp_params->dp_phy_pattern != DP_TEST_PATTERN_VIDEO_MODE) + return; + + struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link); + + if (!link_enc) + return; + + link_enc->funcs->dp_set_phy_pattern(link_enc, tp_params); + link->dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_SET_SOURCE_PATTERN); +} + +static void set_dio_dpia_lane_settings(struct dc_link *link, + const struct link_resource *link_res, + const struct dc_link_settings *link_settings, + const struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX]) +{ } static const struct link_hwss dpia_link_hwss = { @@ -65,8 +88,8 @@ static const struct link_hwss dpia_link_hwss = { .ext = { .set_throttled_vcp_size = set_dio_throttled_vcp_size, .enable_dp_link_output = enable_dio_dp_link_output, - .set_dp_link_test_pattern = set_dio_dp_link_test_pattern, - .set_dp_lane_settings = set_dio_dp_lane_settings, + .set_dp_link_test_pattern = set_dio_dpia_link_test_pattern, + .set_dp_lane_settings = set_dio_dpia_lane_settings, .update_stream_allocation_table = update_dpia_stream_allocation_table, }, }; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c index 1aed55b0ab6a..60f15a9ba7a5 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c @@ -287,6 +287,13 @@ static bool dp_validate_mode_timing( req_bw = dc_bandwidth_in_kbps_from_timing(timing, dc_link_get_highest_encoding_format(link)); max_bw = dp_link_bandwidth_kbps(link, link_setting); + bool is_max_uncompressed_pixel_rate_exceeded = link->dpcd_caps.max_uncompressed_pixel_rate_cap.bits.valid && + timing->pix_clk_100hz > link->dpcd_caps.max_uncompressed_pixel_rate_cap.bits.max_uncompressed_pixel_rate_cap * 10000; + + if (is_max_uncompressed_pixel_rate_exceeded && !timing->flags.DSC) { + return false; + } + if (req_bw <= max_bw) { /* remember the biggest mode here, during * initial link training (to get diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index 34a618a7278b..d78c8ec4de79 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -1942,6 +1942,11 @@ static bool retrieve_link_cap(struct dc_link *link) DC_LOG_DP2("\tFEC aggregated error counters are supported"); } + core_link_read_dpcd(link, + DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP, + link->dpcd_caps.max_uncompressed_pixel_rate_cap.raw, + sizeof(link->dpcd_caps.max_uncompressed_pixel_rate_cap.raw)); + retrieve_cable_id(link); dpcd_write_cable_id_to_dprx(link); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index 988999c44475..27b881f947e8 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -515,6 +515,41 @@ bool dp_is_interlane_aligned(union lane_align_status_updated align_status) return align_status.bits.INTERLANE_ALIGN_DONE == 1; } +bool dp_check_interlane_aligned(union lane_align_status_updated align_status, + struct dc_link *link, + uint8_t retries) +{ + /* Take into consideration corner case for DP 1.4a LL Compliance CTS as USB4 + * has to share encoders unlike DP and USBC + */ + return (dp_is_interlane_aligned(align_status) || + (link->skip_fallback_on_link_loss && retries)); +} + +uint32_t dp_get_eq_aux_rd_interval( + const struct dc_link *link, + const struct link_training_settings *lt_settings, + uint32_t offset, + uint8_t retries) +{ + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { + if (offset == 0 && retries == 1 && lt_settings->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) + return max(lt_settings->eq_pattern_time, (uint32_t) DPIA_CLK_SYNC_DELAY); + else + return dpia_get_eq_aux_rd_interval(link, lt_settings, offset); + } else if (is_repeater(lt_settings, offset)) + return dp_translate_training_aux_read_interval( + link->dpcd_caps.lttpr_caps.aux_rd_interval[offset - 1]); + else + return lt_settings->eq_pattern_time; +} + +bool dp_check_dpcd_reqeust_status(const struct dc_link *link, + enum dc_status status) +{ + return (status != DC_OK && link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA); +} + enum link_training_result dp_check_link_loss_status( struct dc_link *link, const struct link_training_settings *link_training_setting) @@ -973,13 +1008,17 @@ void repeater_training_done(struct dc_link *link, uint32_t offset) dpcd_pattern.v1_4.TRAINING_PATTERN_SET); } -static void dpcd_exit_training_mode(struct dc_link *link, enum dp_link_encoding encoding) +static enum link_training_result dpcd_exit_training_mode(struct dc_link *link, enum dp_link_encoding encoding) { + enum dc_status status; uint8_t sink_status = 0; uint8_t i; /* clear training pattern set */ - dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE); + status = dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE); + + if (dp_check_dpcd_reqeust_status(link, status)) + return LINK_TRAINING_ABORT; if (encoding == DP_128b_132b_ENCODING) { /* poll for intra-hop disable */ @@ -990,6 +1029,8 @@ static void dpcd_exit_training_mode(struct dc_link *link, enum dp_link_encoding fsleep(1000); } } + + return LINK_TRAINING_SUCCESS; } enum dc_status dpcd_configure_channel_coding(struct dc_link *link, @@ -1013,17 +1054,18 @@ enum dc_status dpcd_configure_channel_coding(struct dc_link *link, return status; } -void dpcd_set_training_pattern( +enum dc_status dpcd_set_training_pattern( struct dc_link *link, enum dc_dp_training_pattern training_pattern) { + enum dc_status status; union dpcd_training_pattern dpcd_pattern = {0}; dpcd_pattern.v1_4.TRAINING_PATTERN_SET = dp_training_pattern_to_dpcd_training_pattern( link, training_pattern); - core_link_write_dpcd( + status = core_link_write_dpcd( link, DP_TRAINING_PATTERN_SET, &dpcd_pattern.raw, @@ -1033,6 +1075,8 @@ void dpcd_set_training_pattern( __func__, DP_TRAINING_PATTERN_SET, dpcd_pattern.v1_4.TRAINING_PATTERN_SET); + + return status; } enum dc_status dpcd_set_link_settings( @@ -1185,6 +1229,13 @@ void dpcd_set_lt_pattern_and_lane_settings( dpcd_lt_buffer[DP_TRAINING_PATTERN_SET - DP_TRAINING_PATTERN_SET] = dpcd_pattern.raw; + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + dpia_set_tps_notification( + link, + lt_settings, + dpcd_pattern.v1_4.TRAINING_PATTERN_SET, + offset); + if (is_repeater(lt_settings, offset)) { DC_LOG_HW_LINK_TRAINING("%s\n LTTPR Repeater ID: %d\n 0x%X pattern = %x\n", __func__, @@ -1455,7 +1506,8 @@ static enum link_training_result dp_transition_to_video_idle( */ if (link->connector_signal != SIGNAL_TYPE_EDP && status == LINK_TRAINING_SUCCESS) { msleep(5); - status = dp_check_link_loss_status(link, lt_settings); + if (!link->skip_fallback_on_link_loss) + status = dp_check_link_loss_status(link, lt_settings); } return status; } @@ -1521,7 +1573,9 @@ enum link_training_result dp_perform_link_training( ASSERT(0); /* exit training mode */ - dpcd_exit_training_mode(link, encoding); + if ((dpcd_exit_training_mode(link, encoding) != LINK_TRAINING_SUCCESS || status == LINK_TRAINING_ABORT) && + link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + dpia_training_abort(link, <_settings, 0); /* switch to video idle */ if ((status == LINK_TRAINING_SUCCESS) || !skip_video_pattern) @@ -1599,8 +1653,7 @@ bool perform_link_training_with_retries( dp_perform_link_training_skip_aux(link, &pipe_ctx->link_res, &cur_link_settings); return true; } else { - /** @todo Consolidate USB4 DP and DPx.x training. */ - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { + if (!link->dc->config.consolidated_dpia_dp_lt && link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { status = dpia_perform_link_training( link, &pipe_ctx->link_res, @@ -1629,8 +1682,17 @@ bool perform_link_training_with_retries( dp_trace_lt_total_count_increment(link, false); dp_trace_lt_result_update(link, status, false); dp_trace_set_lt_end_timestamp(link, false); - if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low) + if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low) { + // Update verified link settings to current one + // Because DPIA LT might fallback to lower link setting. + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { + link->verified_link_cap.link_rate = link->cur_link_settings.link_rate; + link->verified_link_cap.lane_count = link->cur_link_settings.lane_count; + dm_helpers_dp_mst_update_branch_bandwidth(link->ctx, link); + } return true; + } } fail_count++; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h index 851bd17317a0..0b18aa35c33c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h @@ -55,7 +55,7 @@ void dp_set_hw_test_pattern( uint8_t *custom_pattern, uint32_t custom_pattern_size); -void dpcd_set_training_pattern( +enum dc_status dpcd_set_training_pattern( struct dc_link *link, enum dc_dp_training_pattern training_pattern); @@ -182,4 +182,18 @@ uint32_t dp_translate_training_aux_read_interval( uint8_t dp_get_nibble_at_index(const uint8_t *buf, uint32_t index); + +bool dp_check_interlane_aligned(union lane_align_status_updated align_status, + struct dc_link *link, + uint8_t retries); + +uint32_t dp_get_eq_aux_rd_interval( + const struct dc_link *link, + const struct link_training_settings *lt_settings, + uint32_t offset, + uint8_t retries); + +bool dp_check_dpcd_reqeust_status(const struct dc_link *link, + enum dc_status status); + #endif /* __DC_LINK_DP_TRAINING_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c index 2b4c15b0b407..3bdce32a85e3 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c @@ -157,6 +157,7 @@ enum link_training_result perform_8b_10b_clock_recovery_sequence( struct link_training_settings *lt_settings, uint32_t offset) { + enum dc_status status; uint32_t retries_cr; uint32_t retry_count; uint32_t wait_time_microsec; @@ -216,7 +217,7 @@ enum link_training_result perform_8b_10b_clock_recovery_sequence( /* 4. Read lane status and requested drive * settings as set by the sink */ - dp_get_lane_status_and_lane_adjust( + status = dp_get_lane_status_and_lane_adjust( link, lt_settings, dpcd_lane_status, @@ -224,6 +225,9 @@ enum link_training_result perform_8b_10b_clock_recovery_sequence( dpcd_lane_adjust, offset); + if (dp_check_dpcd_reqeust_status(link, status)) + return LINK_TRAINING_ABORT; + /* 5. check CR done*/ if (dp_is_cr_done(lane_count, dpcd_lane_status)) { DC_LOG_HW_LINK_TRAINING("%s: Clock recovery OK\n", __func__); @@ -273,6 +277,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence( struct link_training_settings *lt_settings, uint32_t offset) { + enum dc_status status; enum dc_dp_training_pattern tr_pattern; uint32_t retries_ch_eq; uint32_t wait_time_microsec; @@ -308,12 +313,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence( dpcd_set_lane_settings(link, lt_settings, offset); /* 3. wait for receiver to lock-on*/ - wait_time_microsec = lt_settings->eq_pattern_time; - - if (is_repeater(lt_settings, offset)) - wait_time_microsec = - dp_translate_training_aux_read_interval( - link->dpcd_caps.lttpr_caps.aux_rd_interval[offset - 1]); + wait_time_microsec = dp_get_eq_aux_rd_interval(link, lt_settings, offset, retries_ch_eq); dp_wait_for_training_aux_rd_interval( link, @@ -322,7 +322,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence( /* 4. Read lane status and requested * drive settings as set by the sink*/ - dp_get_lane_status_and_lane_adjust( + status = dp_get_lane_status_and_lane_adjust( link, lt_settings, dpcd_lane_status, @@ -330,6 +330,9 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence( dpcd_lane_adjust, offset); + if (dp_check_dpcd_reqeust_status(link, status)) + return LINK_TRAINING_ABORT; + /* 5. check CR done*/ if (!dp_is_cr_done(lane_count, dpcd_lane_status)) return dpcd_lane_status[0].bits.CR_DONE_0 ? @@ -339,7 +342,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence( /* 6. check CHEQ done*/ if (dp_is_ch_eq_done(lane_count, dpcd_lane_status) && dp_is_symbol_locked(lane_count, dpcd_lane_status) && - dp_is_interlane_aligned(dpcd_lane_status_updated)) + dp_check_interlane_aligned(dpcd_lane_status_updated, link, retries_ch_eq)) return LINK_TRAINING_SUCCESS; /* 7. update VS/PE/PC2 in lt_settings*/ diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c index cd1975c03f38..39e4b7dc9588 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c @@ -43,9 +43,6 @@ #define DC_LOGGER \ link->ctx->logger -/* The approximate time (us) it takes to transmit 9 USB4 DP clock sync packets. */ -#define DPIA_CLK_SYNC_DELAY 16000 - /* Extend interval between training status checks for manual testing. */ #define DPIA_DEBUG_EXTENDED_AUX_RD_INTERVAL_US 60000000 @@ -566,28 +563,6 @@ static enum link_training_result dpia_training_cr_phase( return result; } -/* Return status read interval during equalization phase. */ -static uint32_t dpia_get_eq_aux_rd_interval( - const struct dc_link *link, - const struct link_training_settings *lt_settings, - uint32_t hop) -{ - uint32_t wait_time_microsec; - - if (hop == DPRX) - wait_time_microsec = lt_settings->eq_pattern_time; - else - wait_time_microsec = - dp_translate_training_aux_read_interval( - link->dpcd_caps.lttpr_caps.aux_rd_interval[hop - 1]); - - /* Check debug option for extending aux read interval. */ - if (link->dc->debug.dpia_debug.bits.extend_aux_rd_interval) - wait_time_microsec = DPIA_DEBUG_EXTENDED_AUX_RD_INTERVAL_US; - - return wait_time_microsec; -} - /* Execute equalization phase of link training for specified hop in display * path in non-transparent mode: * - driver issues both DPCD and SET_CONFIG transactions. @@ -936,6 +911,22 @@ static enum link_training_result dpia_training_end( return result; } +/* Return status read interval during equalization phase. */ +uint32_t dpia_get_eq_aux_rd_interval( + const struct dc_link *link, + const struct link_training_settings *lt_settings, + uint32_t hop) +{ + /* Check debug option for extending aux read interval. */ + if (link->dc->debug.dpia_debug.bits.extend_aux_rd_interval) + return DPIA_DEBUG_EXTENDED_AUX_RD_INTERVAL_US; + else if (hop == DPRX) + return lt_settings->eq_pattern_time; + else + return dp_translate_training_aux_read_interval( + link->dpcd_caps.lttpr_caps.aux_rd_interval[hop - 1]); +} + /* When aborting training of specified hop in display path, clean up by: * - Attempting to clear DPCD TRAINING_PATTERN_SET, LINK_BW_SET and LANE_COUNT_SET. * - Sending SET_CONFIG(SET_LINK) with lane count and link rate set to 0. @@ -943,7 +934,7 @@ static enum link_training_result dpia_training_end( * @param link DPIA link being trained. * @param hop Hop in display path. DPRX = 0. */ -static void dpia_training_abort( +void dpia_training_abort( struct dc_link *link, struct link_training_settings *lt_settings, uint32_t hop) @@ -968,7 +959,26 @@ static void dpia_training_abort( core_link_write_dpcd(link, dpcd_tps_offset, &data, 1); core_link_write_dpcd(link, DP_LINK_BW_SET, &data, 1); core_link_write_dpcd(link, DP_LANE_COUNT_SET, &data, 1); - core_link_send_set_config(link, DPIA_SET_CFG_SET_LINK, data); + + if (!link->dc->config.consolidated_dpia_dp_lt) + core_link_send_set_config(link, DPIA_SET_CFG_SET_LINK, data); +} + +void dpia_set_tps_notification( + struct dc_link *link, + const struct link_training_settings *lt_settings, + uint8_t pattern, + uint32_t hop) +{ + uint8_t repeater_cnt = 0; /* Number of hops/repeaters in display path. */ + + if (lt_settings->lttpr_mode != LTTPR_MODE_NON_TRANSPARENT || pattern == DPCD_TRAINING_PATTERN_VIDEOIDLE) + return; + + repeater_cnt = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + + if (hop != repeater_cnt) + dc_process_dmub_dpia_set_tps_notification(link->ctx->dc, link->link_index, pattern); } enum link_training_result dpia_perform_link_training( diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h index b39fb9faf1c2..9f4eceb494c2 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h @@ -28,6 +28,9 @@ #define __DC_LINK_DP_TRAINING_DPIA_H__ #include "link_dp_training.h" +/* The approximate time (us) it takes to transmit 9 USB4 DP clock sync packets. */ +#define DPIA_CLK_SYNC_DELAY 16000 + /* Train DP tunneling link for USB4 DPIA display endpoint. * DPIA equivalent of dc_link_dp_perfrorm_link_training. * Aborts link training upon detection of sink unplug. @@ -38,4 +41,20 @@ enum link_training_result dpia_perform_link_training( const struct dc_link_settings *link_setting, bool skip_video_pattern); +void dpia_training_abort( + struct dc_link *link, + struct link_training_settings *lt_settings, + uint32_t hop); + +uint32_t dpia_get_eq_aux_rd_interval( + const struct dc_link *link, + const struct link_training_settings *lt_settings, + uint32_t hop); + +void dpia_set_tps_notification( + struct dc_link *link, + const struct link_training_settings *lt_settings, + uint8_t pattern, + uint32_t offset); + #endif /* __DC_LINK_DP_TRAINING_DPIA_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 46ad684fe192..893a9d9ee870 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -2155,6 +2155,7 @@ static bool dcn35_resource_construct( dc->dml2_options.max_segments_per_hubp = 24; dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;/*todo*/ + dc->dml2_options.override_det_buffer_size_kbytes = true; if (dc->config.sdpif_request_limit_words_per_umc == 0) dc->config.sdpif_request_limit_words_per_umc = 16;/*todo*/ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 4c5e722baa3a..70abd32ce2ad 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -736,7 +736,7 @@ static const struct dc_debug_options debug_defaults_drv = { .hdmichar = true, .dpstream = true, .symclk32_se = true, - .symclk32_le = true, + .symclk32_le = false, .symclk_fe = true, .physymclk = false, .dpiasymclk = true, @@ -766,6 +766,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dmub_reallow_idle = false, .static_screen_wait_frames = 2, .notify_dpia_hr_bw = true, + .min_disp_clk_khz = 50000, }; static const struct dc_panel_config panel_config_defaults = { @@ -2133,6 +2134,7 @@ static bool dcn351_resource_construct( dc->dml2_options.max_segments_per_hubp = 24; dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;/*todo*/ + dc->dml2_options.override_det_buffer_size_kbytes = true; if (dc->config.sdpif_request_limit_words_per_umc == 0) dc->config.sdpif_request_limit_words_per_umc = 16;/*todo*/ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index 15f7eda903e6..014e8a296f0c 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -813,6 +813,14 @@ static bool enable_easf(struct spl_in *spl_in, struct spl_scratch *spl_scratch) return skip_easf; } +/* Check if video is in fullscreen mode */ +static bool spl_is_video_fullscreen(struct spl_in *spl_in) +{ + if (spl_is_yuv420(spl_in->basic_in.format) && spl_in->is_fullscreen) + return true; + return false; +} + static bool spl_get_isharp_en(struct spl_in *spl_in, struct spl_scratch *spl_scratch) { @@ -820,6 +828,7 @@ static bool spl_get_isharp_en(struct spl_in *spl_in, int vratio = 0; int hratio = 0; struct spl_taps taps = spl_scratch->scl_data.taps; + bool fullscreen = spl_is_video_fullscreen(spl_in); /* Return if adaptive sharpness is disabled */ if (spl_in->adaptive_sharpness.enable == false) @@ -835,9 +844,18 @@ static bool spl_get_isharp_en(struct spl_in *spl_in, // Scaling is up to 1:1 (no scaling) or upscaling /* - * Apply sharpness to all RGB surfaces and to - * NV12/P010 surfaces + * Apply sharpness to RGB and YUV (NV12/P010) + * surfaces based on policy setting */ + if (!spl_is_yuv420(spl_in->basic_in.format) && + (spl_in->debug.sharpen_policy == SHARPEN_YUV)) + return enable_isharp; + else if ((spl_is_yuv420(spl_in->basic_in.format) && !fullscreen) && + (spl_in->debug.sharpen_policy == SHARPEN_RGB_FULLSCREEN_YUV)) + return enable_isharp; + else if (!spl_in->is_fullscreen && + spl_in->debug.sharpen_policy == SHARPEN_FULLSCREEN_ALL) + return enable_isharp; /* * Apply sharpness if supports horizontal taps 4,6 AND @@ -1155,14 +1173,19 @@ static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_scratch *sp } /* Calculate C0-C3 coefficients based on HDR_mult */ -static void spl_calculate_c0_c3_hdr(struct dscl_prog_data *dscl_prog_data, uint32_t hdr_multx100) +static void spl_calculate_c0_c3_hdr(struct dscl_prog_data *dscl_prog_data, uint32_t sdr_white_level_nits) { struct spl_fixed31_32 hdr_mult, c0_mult, c1_mult, c2_mult; struct spl_fixed31_32 c0_calc, c1_calc, c2_calc; struct spl_custom_float_format fmt; + uint32_t hdr_multx100_int; - SPL_ASSERT(hdr_multx100); - hdr_mult = spl_fixpt_from_fraction((long long)hdr_multx100, 100LL); + if ((sdr_white_level_nits >= 80) && (sdr_white_level_nits <= 480)) + hdr_multx100_int = sdr_white_level_nits * 100 / 80; + else + hdr_multx100_int = 100; /* default for 80 nits otherwise */ + + hdr_mult = spl_fixpt_from_fraction((long long)hdr_multx100_int, 100LL); c0_mult = spl_fixpt_from_fraction(2126LL, 10000LL); c1_mult = spl_fixpt_from_fraction(7152LL, 10000LL); c2_mult = spl_fixpt_from_fraction(722LL, 10000LL); @@ -1191,7 +1214,7 @@ static void spl_calculate_c0_c3_hdr(struct dscl_prog_data *dscl_prog_data, uint3 static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *spl_out, bool enable_easf_v, bool enable_easf_h, enum linear_light_scaling lls_pref, enum spl_pixel_format format, enum system_setup setup, - uint32_t hdr_multx100) + uint32_t sdr_white_level_nits) { struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; if (enable_easf_v) { @@ -1499,7 +1522,7 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s dscl_prog_data->easf_ltonl_en = 1; // Linear input if ((setup == HDR_L) && (spl_is_rgb8(format))) { /* Calculate C0-C3 coefficients based on HDR multiplier */ - spl_calculate_c0_c3_hdr(dscl_prog_data, hdr_multx100); + spl_calculate_c0_c3_hdr(dscl_prog_data, sdr_white_level_nits); } else { // HDR_L ( DWM ) and SDR_L dscl_prog_data->easf_matrix_c0 = 0x4EF7; // fp1.5.10, C0 coefficient (LN_rec709: 0.2126 * (2^14)/125 = 27.86590720) @@ -1557,7 +1580,7 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, struct adaptive_sharpness adp_sharpness, bool enable_isharp, enum linear_light_scaling lls_pref, enum spl_pixel_format format, const struct spl_scaler_data *data, struct spl_fixed31_32 ratio, - enum system_setup setup) + enum system_setup setup, enum scale_to_sharpness_policy scale_to_sharpness_policy) { /* Turn off sharpener if not required */ if (!enable_isharp) { @@ -1565,6 +1588,11 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, return; } + spl_build_isharp_1dlut_from_reference_curve(ratio, setup, adp_sharpness, + scale_to_sharpness_policy); + dscl_prog_data->isharp_delta = spl_get_pregen_filter_isharp_1D_lut(setup); + dscl_prog_data->sharpness_level = adp_sharpness.sharpness_level; + dscl_prog_data->isharp_en = 1; // ISHARP_EN // Set ISHARP_NOISEDET_MODE if htaps = 6-tap if (data->taps.h_taps == 6) { @@ -1662,11 +1690,6 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format } - - spl_build_isharp_1dlut_from_reference_curve(ratio, setup, adp_sharpness); - dscl_prog_data->isharp_delta = spl_get_pregen_filter_isharp_1D_lut(setup); - dscl_prog_data->sharpness_level = adp_sharpness.sharpness_level; - // Program the nldelta soft clip values if (lls_pref == LLS_PREF_YES) { dscl_prog_data->isharp_nldelta_sclip.enable_p = 0; /* ISHARP_NLDELTA_SCLIP_EN_P */ @@ -1750,7 +1773,7 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) // Set EASF spl_set_easf_data(&spl_scratch, spl_out, enable_easf_v, enable_easf_h, spl_in->lls_pref, - spl_in->basic_in.format, setup, spl_in->hdr_multx100); + spl_in->basic_in.format, setup, spl_in->sdr_white_level_nits); // Set iSHARP vratio = spl_fixpt_ceil(spl_scratch.scl_data.ratios.vert); @@ -1761,7 +1784,8 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) isharp_scale_ratio = spl_scratch.scl_data.recip_ratios.horz; spl_set_isharp_data(spl_out->dscl_prog_data, spl_in->adaptive_sharpness, enable_isharp, - spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup); + spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup, + spl_in->debug.scale_to_sharpness_policy); return res; } diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c index 33712f50d303..e0572252c640 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c @@ -500,6 +500,15 @@ struct isharp_1D_lut_pregen filter_isharp_1D_lut_pregen[NUM_SHARPNESS_SETUPS] = }, }; +struct scale_ratio_to_sharpness_level_adj sharpness_level_adj[NUM_SHARPNESS_ADJ_LEVELS] = { + {1125, 1000, 0}, + {11, 10, 1}, + {1075, 1000, 2}, + {105, 100, 3}, + {1025, 1000, 4}, + {1, 1, 5}, +}; + const uint32_t *spl_get_filter_isharp_1D_lut_0(void) { return filter_isharp_1D_lut_0; @@ -541,19 +550,72 @@ uint16_t *spl_get_filter_isharp_bs_3tap_64p(void) return filter_isharp_bs_3tap_64p_s1_12; } -static unsigned int spl_calculate_sharpness_level(int discrete_sharpness_level, enum system_setup setup, - struct spl_sharpness_range sharpness_range) +static unsigned int spl_calculate_sharpness_level_adj(struct spl_fixed31_32 ratio) +{ + int j; + struct spl_fixed31_32 ratio_level; + struct scale_ratio_to_sharpness_level_adj *lookup_ptr; + unsigned int sharpness_level_down_adj; + + /* + * Adjust sharpness level based on current scaling ratio + * + * We have 5 discrete scaling ratios which we will use to adjust the + * sharpness level down by 1 as we pass each ratio. The ratios + * are + * + * 1.125 upscale and higher - no adj + * 1.100 - under 1.125 - adj level down 1 + * 1.075 - under 1.100 - adj level down 2 + * 1.050 - under 1.075 - adj level down 3 + * 1.025 - under 1.050 - adj level down 4 + * 1.000 - under 1.025 - adj level down 5 + * + */ + j = 0; + sharpness_level_down_adj = 0; + lookup_ptr = sharpness_level_adj; + while (j < NUM_SHARPNESS_ADJ_LEVELS) { + ratio_level = spl_fixpt_from_fraction(lookup_ptr->ratio_numer, + lookup_ptr->ratio_denom); + if (ratio.value >= ratio_level.value) { + sharpness_level_down_adj = lookup_ptr->level_down_adj; + break; + } + lookup_ptr++; + j++; + } + return sharpness_level_down_adj; +} + +static unsigned int spl_calculate_sharpness_level(struct spl_fixed31_32 ratio, + int discrete_sharpness_level, enum system_setup setup, + struct spl_sharpness_range sharpness_range, + enum scale_to_sharpness_policy scale_to_sharpness_policy) { unsigned int sharpness_level = 0; + unsigned int sharpness_level_down_adj = 0; int min_sharpness, max_sharpness, mid_sharpness; + /* + * Adjust sharpness level if policy requires we adjust it based on + * scale ratio. Based on scale ratio, we may adjust the sharpness + * level down by a certain number of steps. We will not select + * a sharpness value of 0 so the lowest sharpness level will be + * 0 or 1 depending on what the min_sharpness is + * + * If the policy is no required, this code maybe removed at a later + * date + */ switch (setup) { case HDR_L: min_sharpness = sharpness_range.hdr_rgb_min; max_sharpness = sharpness_range.hdr_rgb_max; mid_sharpness = sharpness_range.hdr_rgb_mid; + if (scale_to_sharpness_policy == SCALE_TO_SHARPNESS_ADJ_ALL) + sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio); break; case HDR_NL: /* currently no use case, use Non-linear SDR values for now */ @@ -561,15 +623,26 @@ static unsigned int spl_calculate_sharpness_level(int discrete_sharpness_level, min_sharpness = sharpness_range.sdr_yuv_min; max_sharpness = sharpness_range.sdr_yuv_max; mid_sharpness = sharpness_range.sdr_yuv_mid; + if (scale_to_sharpness_policy >= SCALE_TO_SHARPNESS_ADJ_YUV) + sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio); break; case SDR_L: default: min_sharpness = sharpness_range.sdr_rgb_min; max_sharpness = sharpness_range.sdr_rgb_max; mid_sharpness = sharpness_range.sdr_rgb_mid; + if (scale_to_sharpness_policy == SCALE_TO_SHARPNESS_ADJ_ALL) + sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio); break; } + if ((min_sharpness == 0) && (sharpness_level_down_adj >= discrete_sharpness_level)) + discrete_sharpness_level = 1; + else if (sharpness_level_down_adj >= discrete_sharpness_level) + discrete_sharpness_level = 0; + else + discrete_sharpness_level -= sharpness_level_down_adj; + int lower_half_step_size = (mid_sharpness - min_sharpness) / 5; int upper_half_step_size = (max_sharpness - mid_sharpness) / 5; @@ -584,7 +657,7 @@ static unsigned int spl_calculate_sharpness_level(int discrete_sharpness_level, } void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup, - struct adaptive_sharpness sharpness) + struct adaptive_sharpness sharpness, enum scale_to_sharpness_policy scale_to_sharpness_policy) { uint8_t *byte_ptr_1dlut_src, *byte_ptr_1dlut_dst; struct spl_fixed31_32 sharp_base, sharp_calc, sharp_level; @@ -594,8 +667,9 @@ void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, en uint32_t filter_pregen_store[ISHARP_LUT_TABLE_SIZE]; /* Custom sharpnessX1000 value */ - unsigned int sharpnessX1000 = spl_calculate_sharpness_level(sharpness.sharpness_level, - setup, sharpness.sharpness_range); + unsigned int sharpnessX1000 = spl_calculate_sharpness_level(ratio, + sharpness.sharpness_level, setup, + sharpness.sharpness_range, scale_to_sharpness_policy); sharp_level = spl_fixpt_from_fraction(sharpnessX1000, 1000); /* @@ -606,7 +680,6 @@ void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, en (filter_isharp_1D_lut_pregen[setup].sharpness_denom == 1000)) return; - /* * Calculate LUT_128_gained with this equation: * diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h index fe0b12571f2c..afcc66206ca2 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h @@ -20,11 +20,11 @@ uint16_t *spl_get_filter_isharp_bs_3tap_64p(void); const uint16_t *spl_get_filter_isharp_wide_6tap_64p(void); uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps); -struct scale_ratio_to_sharpness_level_lookup { +#define NUM_SHARPNESS_ADJ_LEVELS 6 +struct scale_ratio_to_sharpness_level_adj { unsigned int ratio_numer; unsigned int ratio_denom; - unsigned int sharpness_numer; - unsigned int sharpness_denom; + unsigned int level_down_adj; /* adjust sharpness level down */ }; struct isharp_1D_lut_pregen { @@ -45,6 +45,7 @@ void spl_init_blur_scale_coeffs(void); void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, const struct spl_scaler_data *data); -void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup, struct adaptive_sharpness sharpness); +void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup, + struct adaptive_sharpness sharpness, enum scale_to_sharpness_policy scale_to_sharpness_policy); uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum system_setup setup); #endif /* __DC_SPL_ISHARP_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index 85b19ebe2c57..2a74ff5fdfdb 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -487,6 +487,17 @@ enum linear_light_scaling { // convert it in translation logic LLS_PREF_YES, LLS_PREF_NO }; +enum sharpen_policy { + SHARPEN_ALWAYS = 0, + SHARPEN_YUV = 1, + SHARPEN_RGB_FULLSCREEN_YUV = 2, + SHARPEN_FULLSCREEN_ALL = 3 +}; +enum scale_to_sharpness_policy { + NO_SCALE_TO_SHARPNESS_ADJ = 0, + SCALE_TO_SHARPNESS_ADJ_YUV = 1, + SCALE_TO_SHARPNESS_ADJ_ALL = 2 +}; struct spl_funcs { void (*spl_calc_lb_num_partitions) (bool alpha_en, @@ -499,6 +510,8 @@ struct spl_funcs { struct spl_debug { int visual_confirm_base_offset; int visual_confirm_dpp_offset; + enum sharpen_policy sharpen_policy; + enum scale_to_sharpness_policy scale_to_sharpness_policy; }; struct spl_in { @@ -518,7 +531,7 @@ struct spl_in { bool is_hdr_on; int h_active; int v_active; - int hdr_multx100; + int sdr_white_level_nits; }; // end of SPL inputs diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index cd70453aeae0..fe5b6f7a3eb1 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -300,6 +300,7 @@ struct dmub_srv_hw_params { enum dmub_ips_disable_type disable_ips; bool disallow_phy_access; bool disable_sldo_opt; + bool enable_non_transparent_setconfig; }; /** diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index e20c220aa8b4..ebcf68bfae2b 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -682,7 +682,7 @@ union dmub_fw_boot_options { uint32_t gpint_scratch8: 1; /* 1 if GPINT is in scratch8*/ uint32_t usb4_cm_version: 1; /**< 1 CM support */ uint32_t dpia_hpd_int_enable_supported: 1; /* 1 if dpia hpd int enable supported */ - uint32_t reserved0: 1; + uint32_t enable_non_transparent_setconfig: 1; /* 1 if dpia use conventional dp lt flow*/ uint32_t disable_clk_ds: 1; /* 1 if disallow dispclk_ds and dppclk_ds*/ uint32_t disable_timeout_recovery : 1; /* 1 if timeout recovery should be disabled */ uint32_t ips_pg_disable: 1; /* 1 to disable ONO domains power gating*/ @@ -1308,6 +1308,7 @@ enum dmub_cmd_dpia_type { DMUB_CMD__DPIA_DIG1_DPIA_CONTROL = 0, DMUB_CMD__DPIA_SET_CONFIG_ACCESS = 1, DMUB_CMD__DPIA_MST_ALLOC_SLOTS = 2, + DMUB_CMD__DPIA_SET_TPS_NOTIFICATION = 3, }; /* DMUB_OUT_CMD__DPIA_NOTIFICATION command types. */ @@ -2139,6 +2140,24 @@ struct dmub_rb_cmd_set_mst_alloc_slots { }; /** + * Data passed from driver to FW in a DMUB_CMD__SET_TPS_NOTIFICATION command. + */ +struct dmub_cmd_tps_notification_data { + uint8_t instance; /* DPIA instance */ + uint8_t tps; /* requested training pattern */ + uint8_t reserved1; + uint8_t reserved2; +}; + +/** + * DMUB command structure for SET_TPS_NOTIFICATION command. + */ +struct dmub_rb_cmd_set_tps_notification { + struct dmub_cmd_header header; /* header */ + struct dmub_cmd_tps_notification_data tps_notification; /* set tps_notification data */ +}; + +/** * DMUB command structure for DPIA HPD int enable control. */ struct dmub_rb_cmd_dpia_hpd_int_enable { @@ -5305,6 +5324,10 @@ union dmub_rb_cmd { */ struct dmub_rb_cmd_set_mst_alloc_slots set_mst_alloc_slots; /** + * Definition of a DMUB_CMD__DPIA_SET_TPS_NOTIFICATION command. + */ + struct dmub_rb_cmd_set_tps_notification set_tps_notification; + /** * Definition of a DMUB_CMD__EDID_CEA command. */ struct dmub_rb_cmd_edid_cea edid_cea; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c index 746696b6f09a..2ccad79053c5 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c @@ -425,6 +425,7 @@ void dmub_dcn35_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmu boot_options.bits.ips_disable = params->disable_ips; boot_options.bits.ips_sequential_ono = params->ips_sequential_ono; boot_options.bits.disable_sldo_opt = params->disable_sldo_opt; + boot_options.bits.enable_non_transparent_setconfig = params->enable_non_transparent_setconfig; REG_WRITE(DMCUB_SCRATCH14, boot_options.all); } diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index a40e6590215a..bbd259cea4f4 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -134,7 +134,7 @@ unsigned int mod_freesync_calc_v_total_from_refresh( v_total = div64_u64(div64_u64(((unsigned long long)( frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), - stream->timing.h_total), 1000000); + stream->timing.h_total) + 500000, 1000000); /* v_total cannot be less than nominal */ if (v_total < stream->timing.v_total) { diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 745fd052840d..3f91926a50e9 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -85,7 +85,7 @@ enum amd_apu_flags { * @AMD_IP_BLOCK_TYPE_MES: Micro-Engine Scheduler * @AMD_IP_BLOCK_TYPE_JPEG: JPEG Engine * @AMD_IP_BLOCK_TYPE_VPE: Video Processing Engine -* @AMD_IP_BLOCK_TYPE_UMSCH_MM: User Mode Schduler for Multimedia +* @AMD_IP_BLOCK_TYPE_UMSCH_MM: User Mode Scheduler for Multimedia * @AMD_IP_BLOCK_TYPE_ISP: Image Signal Processor * @AMD_IP_BLOCK_TYPE_NUM: Total number of IP block types */ diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index 09cbc3afd6d8..b0fc22383e28 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -1038,7 +1038,7 @@ struct display_object_info_table_v1_4 uint16_t supporteddevices; uint8_t number_of_path; uint8_t reserved; - struct atom_display_object_path_v2 display_path[8]; //the real number of this included in the structure is calculated by using the (whole structure size - the header size- number_of_path)/size of atom_display_object_path + struct atom_display_object_path_v2 display_path[]; //the real number of this included in the structure is calculated by using the (whole structure size - the header size- number_of_path)/size of atom_display_object_path }; struct display_object_info_table_v1_5 { @@ -1048,7 +1048,7 @@ struct display_object_info_table_v1_5 { uint8_t reserved; // the real number of this included in the structure is calculated by using the // (whole structure size - the header size- number_of_path)/size of atom_display_object_path - struct atom_display_object_path_v3 display_path[8]; + struct atom_display_object_path_v3 display_path[]; }; /* diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 7744ca3ef4b1..e3e635a31b8a 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -71,6 +71,11 @@ enum kgd_memory_pool { KGD_POOL_FRAMEBUFFER = 3, }; +struct kfd_cu_occupancy { + u32 wave_cnt; + u32 doorbell_off; +}; + /** * enum kfd_sched_policy * @@ -313,8 +318,9 @@ struct kfd2kgd_calls { uint32_t grace_period, uint32_t *reg_offset, uint32_t *reg_data); - void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid, - int *wave_cnt, int *max_waves_per_cu, uint32_t inst); + void (*get_cu_occupancy)(struct amdgpu_device *adev, + struct kfd_cu_occupancy *cu_occupancy, + int *max_waves_per_cu, uint32_t inst); void (*program_trap_handler_settings)(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst); diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h index 0b3c2f54a343..822c6425d90e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h @@ -123,7 +123,7 @@ typedef enum { VOLTAGE_GUARDBAND_COUNT } GFX_GUARDBAND_e; -#define SMU_METRICS_TABLE_VERSION 0xC +#define SMU_METRICS_TABLE_VERSION 0xD typedef struct __attribute__((packed, aligned(4))) { uint32_t AccumulationCounter; @@ -227,6 +227,10 @@ typedef struct __attribute__((packed, aligned(4))) { // PCIE LINK Speed and width uint32_t PCIeLinkSpeed; uint32_t PCIeLinkWidth; + + // PER XCD ACTIVITY + uint32_t GfxBusy[8]; + uint64_t GfxBusyAcc[8]; } MetricsTableX_t; typedef struct __attribute__((packed, aligned(4))) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index ac0dd6b97f8d..e71a721c12b9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -439,7 +439,16 @@ enum smu_clk_type { __SMU_DUMMY_MAP(BACO_CG), \ __SMU_DUMMY_MAP(SOC_CG), \ __SMU_DUMMY_MAP(LOW_POWER_DCNCLKS), \ - __SMU_DUMMY_MAP(WHISPER_MODE), + __SMU_DUMMY_MAP(WHISPER_MODE), \ + __SMU_DUMMY_MAP(EDC_PWRBRK), \ + __SMU_DUMMY_MAP(SOC_EDC_XVMIN), \ + __SMU_DUMMY_MAP(GFX_PSM_DIDT), \ + __SMU_DUMMY_MAP(APT_ALL_ENABLE), \ + __SMU_DUMMY_MAP(APT_SQ_THROTTLE), \ + __SMU_DUMMY_MAP(APT_PF_DCS), \ + __SMU_DUMMY_MAP(GFX_EDC_XVMIN), \ + __SMU_DUMMY_MAP(GFX_DIDT_XVMIN), \ + __SMU_DUMMY_MAP(FAN_ABNORMAL), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(feature) SMU_FEATURE_##feature##_BIT diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index a887ab945dfa..1d024b122b0c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2569,10 +2569,14 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, } } - return smu_cmn_send_smc_msg_with_param(smu, + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, workload_mask, NULL); + if (!ret) + smu->workload_mask = workload_mask; + + return ret; } static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 9974c9f8135e..55ed6247eb61 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2107,8 +2107,12 @@ static int smu_v13_0_6_i2c_xfer(struct i2c_adapter *i2c_adap, } mutex_lock(&adev->pm.mutex); r = smu_v13_0_6_request_i2c_xfer(smu, req); - if (r) - goto fail; + if (r) { + /* Retry once, in case of an i2c collision */ + r = smu_v13_0_6_request_i2c_xfer(smu, req); + if (r) + goto fail; + } for (c = i = 0; i < num_msgs; i++) { if (!(msg[i].flags & I2C_M_RD)) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 7bc95c404377..b891a5e0a396 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2501,8 +2501,11 @@ static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *inp return -EINVAL; ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, 1 << workload_type, NULL); + if (ret) dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); + else + smu->workload_mask = (1 << workload_type); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index a31fae5feedf..5899d01fa73d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -199,6 +199,15 @@ static struct cmn2asic_mapping smu_v14_0_2_feature_mask_map[SMU_FEATURE_COUNT] = FEA_MAP(MEM_TEMP_READ), FEA_MAP(ATHUB_MMHUB_PG), FEA_MAP(SOC_PCC), + FEA_MAP(EDC_PWRBRK), + FEA_MAP(SOC_EDC_XVMIN), + FEA_MAP(GFX_PSM_DIDT), + FEA_MAP(APT_ALL_ENABLE), + FEA_MAP(APT_SQ_THROTTLE), + FEA_MAP(APT_PF_DCS), + FEA_MAP(GFX_EDC_XVMIN), + FEA_MAP(GFX_DIDT_XVMIN), + FEA_MAP(FAN_ABNORMAL), [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, [SMU_FEATURE_PPT_BIT] = {1, FEATURE_THROTTLERS_BIT}, @@ -687,6 +696,9 @@ static int smu_v14_0_2_set_default_dpm_table(struct smu_context *smu) pcie_table->clk_freq[pcie_table->num_of_link_levels] = skutable->LclkFreq[link_level]; pcie_table->num_of_link_levels++; + + if (link_level == 0) + link_level++; } /* dcefclk dpm table setup */ @@ -1849,10 +1861,14 @@ static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu, if (workload_type < 0) return -EINVAL; - return smu_cmn_send_smc_msg_with_param(smu, + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, 1 << workload_type, NULL); + if (!ret) + smu->workload_mask = 1 << workload_type; + + return ret; } static int smu_v14_0_2_baco_enter(struct smu_context *smu) diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c index 63b7ef02513d..1475e1483110 100644 --- a/drivers/gpu/drm/ast/ast_drv.c +++ b/drivers/gpu/drm/ast/ast_drv.c @@ -287,9 +287,9 @@ static int ast_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) return ret; - regs = pcim_iomap(pdev, 1, 0); - if (!regs) - return -EIO; + regs = pcim_iomap_region(pdev, 1, "ast"); + if (IS_ERR(regs)) + return PTR_ERR(regs); if (pdev->revision >= 0x40) { /* @@ -311,9 +311,9 @@ static int ast_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (len < AST_IO_MM_LENGTH) return -EIO; - ioregs = pcim_iomap(pdev, 2, 0); - if (!ioregs) - return -EIO; + ioregs = pcim_iomap_region(pdev, 2, "ast"); + if (IS_ERR(ioregs)) + return PTR_ERR(ioregs); } else { /* * Anything else is best effort. diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c index dee640ab1d3a..41f72d458487 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c @@ -47,7 +47,7 @@ #include <drm/drm_print.h> #include <drm/drm_probe_helper.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "cdns-mhdp8546-core.h" #include "cdns-mhdp8546-hdcp.h" diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c index 5e3b8edcf794..31832ba4017f 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c @@ -9,7 +9,7 @@ #include <linux/io.h> #include <linux/iopoll.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <drm/display/drm_hdcp_helper.h> diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c index 73ccf21ae446..4416d0be7272 100644 --- a/drivers/gpu/drm/bridge/samsung-dsim.c +++ b/drivers/gpu/drm/bridge/samsung-dsim.c @@ -10,7 +10,7 @@ * Tomasz Figa <t.figa@samsung.com> */ -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/clk.h> #include <linux/delay.h> diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 6bb755e9f0a5..26b8d137bce0 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -6,7 +6,7 @@ * Andrzej Hajda <a.hajda@samsung.com> */ -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <drm/bridge/mhl.h> #include <drm/drm_bridge.h> diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c index 67b8d17a722a..221e9a4edb40 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c @@ -8,6 +8,7 @@ #include <linux/interrupt.h> #include <linux/module.h> #include <linux/platform_device.h> +#include <linux/vmalloc.h> #include <drm/bridge/dw_hdmi.h> #include <drm/drm_edid.h> @@ -388,15 +389,36 @@ static int dw_hdmi_close(struct snd_pcm_substream *substream) static int dw_hdmi_hw_free(struct snd_pcm_substream *substream) { - return snd_pcm_lib_free_vmalloc_buffer(substream); + struct snd_pcm_runtime *runtime = substream->runtime; + + vfree(runtime->dma_area); + runtime->dma_area = NULL; + return 0; } static int dw_hdmi_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { + struct snd_pcm_runtime *runtime = substream->runtime; + size_t size = params_buffer_bytes(params); + /* Allocate the PCM runtime buffer, which is exposed to userspace. */ - return snd_pcm_lib_alloc_vmalloc_buffer(substream, - params_buffer_bytes(params)); + if (runtime->dma_area) { + if (runtime->dma_bytes >= size) + return 0; /* already large enough */ + vfree(runtime->dma_area); + } + runtime->dma_area = vzalloc(size); + if (!runtime->dma_area) + return -ENOMEM; + runtime->dma_bytes = size; + return 1; +} + +static struct page *dw_hdmi_get_page(struct snd_pcm_substream *substream, + unsigned long offset) +{ + return vmalloc_to_page(substream->runtime->dma_area + offset); } static int dw_hdmi_prepare(struct snd_pcm_substream *substream) @@ -515,7 +537,7 @@ static const struct snd_pcm_ops snd_dw_hdmi_ops = { .prepare = dw_hdmi_prepare, .trigger = dw_hdmi_trigger, .pointer = dw_hdmi_pointer, - .page = snd_pcm_lib_get_vmalloc_page, + .page = dw_hdmi_get_page, }; static int snd_dw_hdmi_probe(struct platform_device *pdev) diff --git a/drivers/gpu/drm/bridge/tc358775.c b/drivers/gpu/drm/bridge/tc358775.c index 3b7cc3be2ccd..0b4efaca6d68 100644 --- a/drivers/gpu/drm/bridge/tc358775.c +++ b/drivers/gpu/drm/bridge/tc358775.c @@ -19,7 +19,7 @@ #include <linux/regulator/consumer.h> #include <linux/slab.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <drm/display/drm_dp_helper.h> #include <drm/drm_atomic_helper.h> diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 0fd4cb400e81..9e31f750fd88 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -21,7 +21,7 @@ #include <linux/regmap.h> #include <linux/regulator/consumer.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <drm/display/drm_dp_aux_bus.h> #include <drm/display/drm_dp_helper.h> diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index a040d7dfced1..ac90118b9e7a 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -6083,6 +6083,7 @@ struct drm_dp_aux *drm_dp_mst_dsc_aux_for_port(struct drm_dp_mst_port *port) struct drm_dp_aux *immediate_upstream_aux; struct drm_dp_mst_port *fec_port; struct drm_dp_desc desc = {}; + u8 upstream_dsc; u8 endpoint_fec; u8 endpoint_dsc; @@ -6109,8 +6110,6 @@ struct drm_dp_aux *drm_dp_mst_dsc_aux_for_port(struct drm_dp_mst_port *port) /* DP-to-DP peer device */ if (drm_dp_mst_is_virtual_dpcd(immediate_upstream_port)) { - u8 upstream_dsc; - if (drm_dp_dpcd_read(&port->aux, DP_DSC_SUPPORT, &endpoint_dsc, 1) != 1) return NULL; @@ -6156,6 +6155,13 @@ struct drm_dp_aux *drm_dp_mst_dsc_aux_for_port(struct drm_dp_mst_port *port) if (drm_dp_has_quirk(&desc, DP_DPCD_QUIRK_DSC_WITHOUT_VIRTUAL_DPCD)) { u8 dpcd_ext[DP_RECEIVER_CAP_SIZE]; + if (drm_dp_dpcd_read(immediate_upstream_aux, + DP_DSC_SUPPORT, &upstream_dsc, 1) != 1) + return NULL; + + if (!(upstream_dsc & DP_DSC_DECOMPRESSION_IS_SUPPORTED)) + return NULL; + if (drm_dp_read_dpcd_caps(immediate_upstream_aux, dpcd_ext) < 0) return NULL; diff --git a/drivers/gpu/drm/display/drm_hdmi_state_helper.c b/drivers/gpu/drm/display/drm_hdmi_state_helper.c index 7854820089ec..feb7a3a75981 100644 --- a/drivers/gpu/drm/display/drm_hdmi_state_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_state_helper.c @@ -521,8 +521,6 @@ int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, } EXPORT_SYMBOL(drm_atomic_helper_connector_hdmi_check); -#define HDMI_MAX_INFOFRAME_SIZE 29 - static int clear_device_infoframe(struct drm_connector *connector, enum hdmi_infoframe_type type) { @@ -563,7 +561,7 @@ static int write_device_infoframe(struct drm_connector *connector, { const struct drm_connector_hdmi_funcs *funcs = connector->hdmi.funcs; struct drm_device *dev = connector->dev; - u8 buffer[HDMI_MAX_INFOFRAME_SIZE]; + u8 buffer[HDMI_INFOFRAME_SIZE(MAX)]; int ret; int len; diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 7936c2023955..370dc676e3aa 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -543,7 +543,7 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, &state->fb_damage_clips, val, -1, - sizeof(struct drm_rect), + sizeof(struct drm_mode_rect), &replaced); return ret; } else if (property == plane->scaling_filter_property) { diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c index 6b239a24f1df..9d3e6dd68810 100644 --- a/drivers/gpu/drm/drm_debugfs.c +++ b/drivers/gpu/drm/drm_debugfs.c @@ -520,8 +520,6 @@ static const struct file_operations drm_connector_fops = { .write = connector_write }; -#define HDMI_MAX_INFOFRAME_SIZE 29 - static ssize_t audio_infoframe_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { @@ -579,7 +577,7 @@ static ssize_t _f##_read_infoframe(struct file *filp, \ struct drm_connector *connector; \ union hdmi_infoframe *frame; \ struct drm_device *dev; \ - u8 buf[HDMI_MAX_INFOFRAME_SIZE]; \ + u8 buf[HDMI_INFOFRAME_SIZE(MAX)]; \ ssize_t len = 0; \ \ connector = filp->private_data; \ diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c index 2da094bdf8a4..18e366cc4993 100644 --- a/drivers/gpu/drm/drm_exec.c +++ b/drivers/gpu/drm/drm_exec.c @@ -145,8 +145,7 @@ static int drm_exec_obj_locked(struct drm_exec *exec, size_t size = exec->max_objects * sizeof(void *); void *tmp; - tmp = kvrealloc(exec->objects, size, size + PAGE_SIZE, - GFP_KERNEL); + tmp = kvrealloc(exec->objects, size + PAGE_SIZE, GFP_KERNEL); if (!tmp) return -ENOMEM; diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index 01fde94fe2a9..ad1dc638c83b 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -103,7 +103,6 @@ bool drm_dev_needs_global_mutex(struct drm_device *dev) * .compat_ioctl = drm_compat_ioctl, // NULL if CONFIG_COMPAT=n * .poll = drm_poll, * .read = drm_read, - * .llseek = no_llseek, * .mmap = drm_gem_mmap, * }; * @@ -310,6 +309,8 @@ int drm_open_helper(struct file *filp, struct drm_minor *minor) if (dev->switch_power_state != DRM_SWITCH_POWER_ON && dev->switch_power_state != DRM_SWITCH_POWER_DYNAMIC_OFF) return -EINVAL; + if (WARN_ON_ONCE(!(filp->f_op->fop_flags & FOP_UNSIGNED_OFFSET))) + return -EINVAL; drm_dbg_core(dev, "comm=\"%s\", pid=%d, minor=%d\n", current->comm, task_pid_nr(current), minor->index); @@ -327,7 +328,6 @@ int drm_open_helper(struct file *filp, struct drm_minor *minor) } filp->private_data = priv; - filp->f_mode |= FMODE_UNSIGNED_OFFSET; priv->filp = filp; mutex_lock(&dev->filelist_mutex); diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 03bd3c7bd0dc..0e3f8adf162f 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -410,22 +410,30 @@ static struct dma_buf *export_and_register_object(struct drm_device *dev, } /** - * drm_gem_prime_handle_to_fd - PRIME export function for GEM drivers + * drm_gem_prime_handle_to_dmabuf - PRIME export function for GEM drivers * @dev: dev to export the buffer from * @file_priv: drm file-private structure * @handle: buffer handle to export * @flags: flags like DRM_CLOEXEC - * @prime_fd: pointer to storage for the fd id of the create dma-buf * * This is the PRIME export function which must be used mandatorily by GEM * drivers to ensure correct lifetime management of the underlying GEM object. * The actual exporting from GEM object to a dma-buf is done through the * &drm_gem_object_funcs.export callback. + * + * Unlike drm_gem_prime_handle_to_fd(), it returns the struct dma_buf it + * has created, without attaching it to any file descriptors. The difference + * between those two is similar to that between anon_inode_getfile() and + * anon_inode_getfd(); insertion into descriptor table is something you + * can not revert if any cleanup is needed, so the descriptor-returning + * variants should only be used when you are past the last failure exit + * and the only thing left is passing the new file descriptor to userland. + * When all you need is the object itself or when you need to do something + * else that might fail, use that one instead. */ -int drm_gem_prime_handle_to_fd(struct drm_device *dev, +struct dma_buf *drm_gem_prime_handle_to_dmabuf(struct drm_device *dev, struct drm_file *file_priv, uint32_t handle, - uint32_t flags, - int *prime_fd) + uint32_t flags) { struct drm_gem_object *obj; int ret = 0; @@ -434,14 +442,14 @@ int drm_gem_prime_handle_to_fd(struct drm_device *dev, mutex_lock(&file_priv->prime.lock); obj = drm_gem_object_lookup(file_priv, handle); if (!obj) { - ret = -ENOENT; + dmabuf = ERR_PTR(-ENOENT); goto out_unlock; } dmabuf = drm_prime_lookup_buf_by_handle(&file_priv->prime, handle); if (dmabuf) { get_dma_buf(dmabuf); - goto out_have_handle; + goto out; } mutex_lock(&dev->object_name_lock); @@ -463,7 +471,6 @@ int drm_gem_prime_handle_to_fd(struct drm_device *dev, /* normally the created dma-buf takes ownership of the ref, * but if that fails then drop the ref */ - ret = PTR_ERR(dmabuf); mutex_unlock(&dev->object_name_lock); goto out; } @@ -478,34 +485,51 @@ out_have_obj: ret = drm_prime_add_buf_handle(&file_priv->prime, dmabuf, handle); mutex_unlock(&dev->object_name_lock); - if (ret) - goto fail_put_dmabuf; - -out_have_handle: - ret = dma_buf_fd(dmabuf, flags); - /* - * We must _not_ remove the buffer from the handle cache since the newly - * created dma buf is already linked in the global obj->dma_buf pointer, - * and that is invariant as long as a userspace gem handle exists. - * Closing the handle will clean out the cache anyway, so we don't leak. - */ - if (ret < 0) { - goto fail_put_dmabuf; - } else { - *prime_fd = ret; - ret = 0; + if (ret) { + dma_buf_put(dmabuf); + dmabuf = ERR_PTR(ret); } - - goto out; - -fail_put_dmabuf: - dma_buf_put(dmabuf); out: drm_gem_object_put(obj); out_unlock: mutex_unlock(&file_priv->prime.lock); + return dmabuf; +} +EXPORT_SYMBOL(drm_gem_prime_handle_to_dmabuf); - return ret; +/** + * drm_gem_prime_handle_to_fd - PRIME export function for GEM drivers + * @dev: dev to export the buffer from + * @file_priv: drm file-private structure + * @handle: buffer handle to export + * @flags: flags like DRM_CLOEXEC + * @prime_fd: pointer to storage for the fd id of the create dma-buf + * + * This is the PRIME export function which must be used mandatorily by GEM + * drivers to ensure correct lifetime management of the underlying GEM object. + * The actual exporting from GEM object to a dma-buf is done through the + * &drm_gem_object_funcs.export callback. + */ +int drm_gem_prime_handle_to_fd(struct drm_device *dev, + struct drm_file *file_priv, uint32_t handle, + uint32_t flags, + int *prime_fd) +{ + struct dma_buf *dmabuf; + int fd = get_unused_fd_flags(flags); + + if (fd < 0) + return fd; + + dmabuf = drm_gem_prime_handle_to_dmabuf(dev, file_priv, handle, flags); + if (IS_ERR(dmabuf)) { + put_unused_fd(fd); + return PTR_ERR(dmabuf); + } + + fd_install(fd, dmabuf->file); + *prime_fd = fd; + return 0; } EXPORT_SYMBOL(drm_gem_prime_handle_to_fd); diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index a0e94217b511..8e3d2d7060f8 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -715,16 +715,16 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private, struct fd f = fdget(fd); int ret; - if (!f.file) + if (!fd_file(f)) return -EINVAL; - if (f.file->f_op != &drm_syncobj_file_fops) { + if (fd_file(f)->f_op != &drm_syncobj_file_fops) { fdput(f); return -EINVAL; } /* take a reference to put in the idr */ - syncobj = f.file->private_data; + syncobj = fd_file(f)->private_data; drm_syncobj_get(syncobj); idr_preload(GFP_KERNEL); @@ -1464,6 +1464,7 @@ drm_syncobj_eventfd_ioctl(struct drm_device *dev, void *data, struct drm_syncobj *syncobj; struct eventfd_ctx *ev_fd_ctx; struct syncobj_eventfd_entry *entry; + int ret; if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE)) return -EOPNOTSUPP; @@ -1479,13 +1480,15 @@ drm_syncobj_eventfd_ioctl(struct drm_device *dev, void *data, return -ENOENT; ev_fd_ctx = eventfd_ctx_fdget(args->fd); - if (IS_ERR(ev_fd_ctx)) - return PTR_ERR(ev_fd_ctx); + if (IS_ERR(ev_fd_ctx)) { + ret = PTR_ERR(ev_fd_ctx); + goto err_fdget; + } entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) { - eventfd_ctx_put(ev_fd_ctx); - return -ENOMEM; + ret = -ENOMEM; + goto err_kzalloc; } entry->syncobj = syncobj; entry->ev_fd_ctx = ev_fd_ctx; @@ -1496,6 +1499,12 @@ drm_syncobj_eventfd_ioctl(struct drm_device *dev, void *data, drm_syncobj_put(syncobj); return 0; + +err_kzalloc: + eventfd_ctx_put(ev_fd_ctx); +err_fdget: + drm_syncobj_put(syncobj); + return ret; } int diff --git a/drivers/gpu/drm/exynos/exynos_drm_dma.c b/drivers/gpu/drm/exynos/exynos_drm_dma.c index e2c7373f20c6..6a6761935224 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dma.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dma.c @@ -110,7 +110,7 @@ int exynos_drm_register_dma(struct drm_device *drm, struct device *dev, void *mapping = NULL; if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) - mapping = arm_iommu_create_mapping(&platform_bus_type, + mapping = arm_iommu_create_mapping(dev, EXYNOS_DEV_ADDR_START, EXYNOS_DEV_ADDR_SIZE); else if (IS_ENABLED(CONFIG_IOMMU_DMA)) mapping = iommu_get_domain_for_dev(priv->dma_dev); diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c index 8b64f61ffaf9..d67c2b3ad901 100644 --- a/drivers/gpu/drm/gma500/psb_drv.c +++ b/drivers/gpu/drm/gma500/psb_drv.c @@ -498,6 +498,7 @@ static const struct file_operations psb_gem_fops = { .mmap = drm_gem_mmap, .poll = drm_poll, .read = drm_read, + .fop_flags = FOP_UNSIGNED_OFFSET, }; static const struct drm_driver driver = { diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index d49435af62c7..bed485374ab0 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -2948,7 +2948,7 @@ init_vbt_missing_defaults(struct intel_display *display) list_add_tail(&devdata->node, &display->vbt.display_devices); drm_dbg_kms(display->drm, - "Generating default VBT child device with type 0x04%x on port %c\n", + "Generating default VBT child device with type 0x%04x on port %c\n", child->device_type, port_name(port)); } diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 00fbe9f8c03a..b1c294236cc8 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -916,7 +916,7 @@ intel_ddi_main_link_aux_domain(struct intel_digital_port *dig_port, * instead of a specific AUX_IO_<port> reference without powering up any * extra wells. */ - if (intel_encoder_can_psr(&dig_port->base)) + if (intel_psr_needs_aux_io_power(&dig_port->base, crtc_state)) return intel_display_power_aux_io_domain(i915, dig_port->aux_ch); else if (DISPLAY_VER(i915) < 14 && (intel_crtc_has_dp_encoder(crtc_state) || diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index a1fcedfd404b..90fa73575feb 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -531,6 +531,10 @@ static void intel_dp_set_source_rates(struct intel_dp *intel_dp) { /* The values must be in increasing order */ + static const int bmg_rates[] = { + 162000, 216000, 243000, 270000, 324000, 432000, 540000, 675000, + 810000, 1000000, 1350000, + }; static const int mtl_rates[] = { 162000, 216000, 243000, 270000, 324000, 432000, 540000, 675000, 810000, 1000000, 2000000, @@ -561,8 +565,13 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) intel_dp->source_rates || intel_dp->num_source_rates); if (DISPLAY_VER(dev_priv) >= 14) { - source_rates = mtl_rates; - size = ARRAY_SIZE(mtl_rates); + if (IS_BATTLEMAGE(dev_priv)) { + source_rates = bmg_rates; + size = ARRAY_SIZE(bmg_rates); + } else { + source_rates = mtl_rates; + size = ARRAY_SIZE(mtl_rates); + } max_rate = mtl_max_source_rate(intel_dp); } else if (DISPLAY_VER(dev_priv) >= 11) { source_rates = icl_rates; @@ -4058,6 +4067,9 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp, struct intel_connector *connector drm_dp_is_branch(intel_dp->dpcd)); intel_init_dpcd_quirks(intel_dp, &intel_dp->desc.ident); + intel_dp->colorimetry_support = + intel_dp_get_colorimetry_status(intel_dp); + /* * Read the eDP display control registers. * @@ -4171,6 +4183,9 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) intel_init_dpcd_quirks(intel_dp, &intel_dp->desc.ident); + intel_dp->colorimetry_support = + intel_dp_get_colorimetry_status(intel_dp); + intel_dp_update_sink_caps(intel_dp); } @@ -6922,9 +6937,6 @@ intel_dp_init_connector(struct intel_digital_port *dig_port, "HDCP init failed, skipping.\n"); } - intel_dp->colorimetry_support = - intel_dp_get_colorimetry_status(intel_dp); - intel_dp->frl.is_trained = false; intel_dp->frl.trained_rate_gbps = 0; diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index d8951464bd2b..f0e3be0fe420 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -32,7 +32,7 @@ #include <linux/slab.h> #include <linux/string_helpers.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <drm/drm_crtc.h> #include <drm/drm_edid.h> diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 1f83b3b67ea6..136a0d6ca970 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -203,6 +203,25 @@ bool intel_encoder_can_psr(struct intel_encoder *encoder) return false; } +bool intel_psr_needs_aux_io_power(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) +{ + /* + * For PSR/PR modes only eDP requires the AUX IO power to be enabled whenever + * the output is enabled. For non-eDP outputs the main link is always + * on, hence it doesn't require the HW initiated AUX wake-up signaling used + * for eDP. + * + * TODO: + * - Consider leaving AUX IO disabled for eDP / PR as well, in case + * the ALPM with main-link off mode is not enabled. + * - Leave AUX IO enabled for DP / PR, once support for ALPM with + * main-link off mode is added for it and this mode gets enabled. + */ + return intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + intel_encoder_can_psr(encoder); +} + static bool psr_global_enabled(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); @@ -2784,13 +2803,6 @@ static int _psr1_ready_for_pipe_update_locked(struct intel_dp *intel_dp) EDP_PSR_STATUS_STATE_MASK, 50); } -static int _panel_replay_ready_for_pipe_update_locked(struct intel_dp *intel_dp) -{ - return intel_dp_is_edp(intel_dp) ? - _psr2_ready_for_pipe_update_locked(intel_dp) : - _psr1_ready_for_pipe_update_locked(intel_dp); -} - /** * intel_psr_wait_for_idle_locked - wait for PSR be ready for a pipe update * @new_crtc_state: new CRTC state @@ -2813,12 +2825,10 @@ void intel_psr_wait_for_idle_locked(const struct intel_crtc_state *new_crtc_stat lockdep_assert_held(&intel_dp->psr.lock); - if (!intel_dp->psr.enabled) + if (!intel_dp->psr.enabled || intel_dp->psr.panel_replay_enabled) continue; - if (intel_dp->psr.panel_replay_enabled) - ret = _panel_replay_ready_for_pipe_update_locked(intel_dp); - else if (intel_dp->psr.sel_update_enabled) + if (intel_dp->psr.sel_update_enabled) ret = _psr2_ready_for_pipe_update_locked(intel_dp); else ret = _psr1_ready_for_pipe_update_locked(intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index 4e09c10908e4..6eb5f15f674f 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -25,6 +25,8 @@ struct intel_plane_state; (intel_dp)->psr.source_panel_replay_support) bool intel_encoder_can_psr(struct intel_encoder *encoder); +bool intel_psr_needs_aux_io_power(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); void intel_psr_init_dpcd(struct intel_dp *intel_dp); void intel_psr_enable_sink(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 9452cad41d07..17d4c880ecc4 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -2453,6 +2453,9 @@ static u8 skl_get_plane_caps(struct drm_i915_private *i915, if (gen12_plane_has_mc_ccs(i915, plane_id)) caps |= INTEL_PLANE_CAP_CCS_MC; + if (DISPLAY_VER(i915) >= 14 && IS_DGFX(i915)) + caps |= INTEL_PLANE_CAP_NEED64K_PHYS; + return caps; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index c5e1c718a6d2..fe69f2c8527d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -424,7 +424,8 @@ shmem_pwrite(struct drm_i915_gem_object *obj, struct address_space *mapping = obj->base.filp->f_mapping; const struct address_space_operations *aops = mapping->a_ops; char __user *user_data = u64_to_user_ptr(arg->data_ptr); - u64 remain, offset; + u64 remain; + loff_t pos; unsigned int pg; /* Caller already validated user args */ @@ -457,12 +458,12 @@ shmem_pwrite(struct drm_i915_gem_object *obj, */ remain = arg->size; - offset = arg->offset; - pg = offset_in_page(offset); + pos = arg->offset; + pg = offset_in_page(pos); do { unsigned int len, unwritten; - struct page *page; + struct folio *folio; void *data, *vaddr; int err; char __maybe_unused c; @@ -480,21 +481,19 @@ shmem_pwrite(struct drm_i915_gem_object *obj, if (err) return err; - err = aops->write_begin(obj->base.filp, mapping, offset, len, - &page, &data); + err = aops->write_begin(obj->base.filp, mapping, pos, len, + &folio, &data); if (err < 0) return err; - vaddr = kmap_local_page(page); + vaddr = kmap_local_folio(folio, offset_in_folio(folio, pos)); pagefault_disable(); - unwritten = __copy_from_user_inatomic(vaddr + pg, - user_data, - len); + unwritten = __copy_from_user_inatomic(vaddr, user_data, len); pagefault_enable(); kunmap_local(vaddr); - err = aops->write_end(obj->base.filp, mapping, offset, len, - len - unwritten, page, data); + err = aops->write_end(obj->base.filp, mapping, pos, len, + len - unwritten, folio, data); if (err < 0) return err; @@ -504,7 +503,7 @@ shmem_pwrite(struct drm_i915_gem_object *obj, remain -= len; user_data += len; - offset += len; + pos += len; pg = 0; } while (remain); @@ -660,7 +659,7 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915, struct drm_i915_gem_object *obj; struct file *file; const struct address_space_operations *aops; - resource_size_t offset; + loff_t pos; int err; GEM_WARN_ON(IS_DGFX(i915)); @@ -672,29 +671,27 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915, file = obj->base.filp; aops = file->f_mapping->a_ops; - offset = 0; + pos = 0; do { unsigned int len = min_t(typeof(size), size, PAGE_SIZE); - struct page *page; - void *pgdata, *vaddr; + struct folio *folio; + void *fsdata; - err = aops->write_begin(file, file->f_mapping, offset, len, - &page, &pgdata); + err = aops->write_begin(file, file->f_mapping, pos, len, + &folio, &fsdata); if (err < 0) goto fail; - vaddr = kmap(page); - memcpy(vaddr, data, len); - kunmap(page); + memcpy_to_folio(folio, offset_in_folio(folio, pos), data, len); - err = aops->write_end(file, file->f_mapping, offset, len, len, - page, pgdata); + err = aops->write_end(file, file->f_mapping, pos, len, len, + folio, fsdata); if (err < 0) goto fail; size -= len; data += len; - offset += len; + pos += len; } while (size); return obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 5c72462d1f57..b22e2019768f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -1131,7 +1131,7 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) GEM_WARN_ON(!i915_ttm_cpu_maps_iomem(bo->resource)); } - if (wakeref & CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND) + if (wakeref && CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND != 0) intel_wakeref_auto(&to_i915(obj->base.dev)->runtime_pm.userfault_wakeref, msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index d4b918fb11ce..1f55e62044a4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -266,7 +266,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) args->timeout_ns = 0; - /* Asked to wait beyond the jiffie/scheduler precision? */ + /* Asked to wait beyond the jiffy/scheduler precision? */ if (ret == -ETIME && args->timeout_ns) ret = -EAGAIN; } diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 4202df5b8c12..222ca7c44951 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -93,7 +93,7 @@ static int wait_for_reset(struct intel_engine_cs *engine, return -EINVAL; } - /* Give the request a jiffie to complete after flushing the worker */ + /* Give the request a jiffy to complete after flushing the worker */ if (i915_request_wait(rq, 0, max(0l, (long)(timeout - jiffies)) + 1) < 0) { pr_err("%s: hanging request %llx:%lld did not complete\n", @@ -3426,7 +3426,7 @@ static int live_preempt_timeout(void *arg) cpu_relax(); saved_timeout = engine->props.preempt_timeout_ms; - engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ + engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffy */ i915_request_get(rq); i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 8322c913bc3c..ed979847187f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2843,9 +2843,9 @@ static void prepare_context_registration_info_v70(struct intel_context *ce, ce->parallel.guc.wqi_tail = 0; ce->parallel.guc.wqi_head = 0; - wq_desc_offset = i915_ggtt_offset(ce->state) + + wq_desc_offset = (u64)i915_ggtt_offset(ce->state) + __get_parent_scratch_offset(ce); - wq_base_offset = i915_ggtt_offset(ce->state) + + wq_base_offset = (u64)i915_ggtt_offset(ce->state) + __get_wq_offset(ce); info->wq_desc_lo = lower_32_bits(wq_desc_offset); info->wq_desc_hi = upper_32_bits(wq_desc_offset); diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index fe905d65ddf7..a40f05b993da 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -1675,6 +1675,7 @@ static const struct file_operations i915_driver_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = drm_show_fdinfo, #endif + .fop_flags = FOP_UNSIGNED_OFFSET, }; static int diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 025a79fe5920..2406cda75b7b 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3751,7 +3751,6 @@ static int i915_perf_release(struct inode *inode, struct file *file) static const struct file_operations fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .release = i915_perf_release, .poll = i915_perf_poll, .read = i915_perf_read, diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index b34a2d3d331d..2576f8f6c0f6 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -69,7 +69,7 @@ void set_timer_ms(struct timer_list *t, unsigned long timeout) * Paranoia to make sure the compiler computes the timeout before * loading 'jiffies' as jiffies is volatile and may be updated in * the background by a timer tick. All to reduce the complexity - * of the addition and reduce the risk of losing a jiffie. + * of the addition and reduce the risk of losing a jiffy. */ barrier(); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 77b50c56c124..3e807195a0d0 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -559,11 +559,7 @@ static int mtk_drm_kms_init(struct drm_device *drm) * Configure the DMA segment size to make sure we get contiguous IOVA * when importing PRIME buffers. */ - ret = dma_set_max_seg_size(dma_dev, UINT_MAX); - if (ret) { - dev_err(dma_dev, "Failed to set DMA segment size\n"); - goto err_component_unbind; - } + dma_set_max_seg_size(dma_dev, UINT_MAX); ret = drm_vblank_init(drm, MAX_CRTC); if (ret < 0) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h index b26d5fe40c72..febc3e764a63 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h @@ -231,7 +231,7 @@ struct dpu_crtc_state { container_of(x, struct dpu_crtc_state, base) /** - * dpu_crtc_frame_pending - retun the number of pending frames + * dpu_crtc_frame_pending - return the number of pending frames * @crtc: Pointer to drm crtc object */ static inline int dpu_crtc_frame_pending(struct drm_crtc *crtc) diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index 4494f6d1c7cb..7ab607252d18 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -357,12 +357,10 @@ void msm_debugfs_init(struct drm_minor *minor) if (priv->kms && priv->kms->funcs->debugfs_init) priv->kms->funcs->debugfs_init(priv->kms, minor); -#ifdef CONFIG_FAULT_INJECTION fault_create_debugfs_attr("fail_gem_alloc", minor->debugfs_root, &fail_gem_alloc); fault_create_debugfs_attr("fail_gem_iova", minor->debugfs_root, &fail_gem_iova); -#endif } #endif diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 9c33f4e3f822..8c13b08708d2 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -7,6 +7,7 @@ #include <linux/dma-mapping.h> #include <linux/fault-inject.h> +#include <linux/debugfs.h> #include <linux/of_address.h> #include <linux/uaccess.h> @@ -58,10 +59,8 @@ static bool modeset = true; MODULE_PARM_DESC(modeset, "Use kernel modesetting [KMS] (1=on (default), 0=disable)"); module_param(modeset, bool, 0600); -#ifdef CONFIG_FAULT_INJECTION DECLARE_FAULT_ATTR(fail_gem_alloc); DECLARE_FAULT_ATTR(fail_gem_iova); -#endif static int msm_drm_uninit(struct device *dev) { diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 0d3adf398bc1..2e28a1344636 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -33,12 +33,8 @@ #include <drm/msm_drm.h> #include <drm/drm_gem.h> -#ifdef CONFIG_FAULT_INJECTION extern struct fault_attr fail_gem_alloc; extern struct fault_attr fail_gem_iova; -#else -# define should_fail(attr, size) 0 -#endif struct msm_kms; struct msm_gpu; diff --git a/drivers/gpu/drm/msm/msm_perf.c b/drivers/gpu/drm/msm/msm_perf.c index 3d3da79fec2a..d3c7889aaf26 100644 --- a/drivers/gpu/drm/msm/msm_perf.c +++ b/drivers/gpu/drm/msm/msm_perf.c @@ -192,7 +192,6 @@ static const struct file_operations perf_debugfs_fops = { .owner = THIS_MODULE, .open = perf_open, .read = perf_read, - .llseek = no_llseek, .release = perf_release, }; diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index ca44fd291c5b..39138e190cb9 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -227,7 +227,6 @@ static const struct file_operations rd_debugfs_fops = { .owner = THIS_MODULE, .open = rd_open, .read = rd_read, - .llseek = no_llseek, .release = rd_release, }; diff --git a/drivers/gpu/drm/nouveau/include/nvif/os.h b/drivers/gpu/drm/nouveau/include/nvif/os.h index a2eaf3929ac3..4a1123b81fee 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/os.h +++ b/drivers/gpu/drm/nouveau/include/nvif/os.h @@ -30,7 +30,7 @@ #include <linux/iommu.h> #include <linux/of_device.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <soc/tegra/fuse.h> #include <soc/tegra/pmc.h> diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 4a9a9b9c3935..f6e78dba594f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -1292,6 +1292,7 @@ nouveau_driver_fops = { .compat_ioctl = nouveau_compat_ioctl, #endif .llseek = noop_llseek, + .fop_flags = FOP_UNSIGNED_OFFSET, }; static struct drm_driver diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h index 50f0c1914f58..4c3f74396579 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h @@ -46,6 +46,8 @@ u32 gm107_ram_probe_fbp(const struct nvkm_ram_func *, u32 gm200_ram_probe_fbp_amount(const struct nvkm_ram_func *, u32, struct nvkm_device *, int, int *); +int gp100_ram_init(struct nvkm_ram *); + /* RAM type-specific MR calculation routines */ int nvkm_sddr2_calc(struct nvkm_ram *); int nvkm_sddr3_calc(struct nvkm_ram *); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c index 378f6fb70990..8987a21e81d1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c @@ -27,7 +27,7 @@ #include <subdev/bios/init.h> #include <subdev/bios/rammap.h> -static int +int gp100_ram_init(struct nvkm_ram *ram) { struct nvkm_subdev *subdev = &ram->fb->subdev; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp102.c index 8550f5e47347..b6b6ee59019d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp102.c @@ -5,6 +5,7 @@ static const struct nvkm_ram_func gp102_ram = { + .init = gp100_ram_init, }; int diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 0caf9e9a8c45..cf1fde8b10e3 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -1423,6 +1423,7 @@ static const struct file_operations panthor_drm_driver_fops = { .read = drm_read, .llseek = noop_llseek, .mmap = panthor_mmap, + .fop_flags = FOP_UNSIGNED_OFFSET, }; #ifdef CONFIG_DEBUG_FS diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index 37f1885c54c7..aa12ed2acfcf 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -1251,9 +1251,17 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, goto err_cleanup; } + /* drm_gpuvm_bo_obtain_prealloc() will call drm_gpuvm_bo_put() on our + * pre-allocated BO if the <BO,VM> association exists. Given we + * only have one ref on preallocated_vm_bo, drm_gpuvm_bo_destroy() will + * be called immediately, and we have to hold the VM resv lock when + * calling this function. + */ + dma_resv_lock(panthor_vm_resv(vm), NULL); mutex_lock(&bo->gpuva_list_lock); op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo); mutex_unlock(&bo->gpuva_list_lock); + dma_resv_unlock(panthor_vm_resv(vm)); /* If the a vm_bo for this <VM,BO> combination exists, it already * retains a pin ref, and we can release the one we took earlier. diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 42afdf0ddb7e..21302d53d2bd 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -1103,7 +1103,13 @@ cs_slot_sync_queue_state_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs list_move_tail(&group->wait_node, &group->ptdev->scheduler->groups.waiting); } - group->blocked_queues |= BIT(cs_id); + + /* The queue is only blocked if there's no deferred operation + * pending, which can be checked through the scoreboard status. + */ + if (!cs_iface->output->status_scoreboards) + group->blocked_queues |= BIT(cs_id); + queue->syncwait.gpu_va = cs_iface->output->status_wait_sync_ptr; queue->syncwait.ref = cs_iface->output->status_wait_sync_value; status_wait_cond = cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_COND_MASK; @@ -2046,6 +2052,7 @@ static void tick_ctx_cleanup(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *ctx) { + struct panthor_device *ptdev = sched->ptdev; struct panthor_group *group, *tmp; u32 i; @@ -2054,7 +2061,7 @@ tick_ctx_cleanup(struct panthor_scheduler *sched, /* If everything went fine, we should only have groups * to be terminated in the old_groups lists. */ - drm_WARN_ON(&group->ptdev->base, !ctx->csg_upd_failed_mask && + drm_WARN_ON(&ptdev->base, !ctx->csg_upd_failed_mask && group_can_run(group)); if (!group_can_run(group)) { @@ -2077,7 +2084,7 @@ tick_ctx_cleanup(struct panthor_scheduler *sched, /* If everything went fine, the groups to schedule lists should * be empty. */ - drm_WARN_ON(&group->ptdev->base, + drm_WARN_ON(&ptdev->base, !ctx->csg_upd_failed_mask && !list_empty(&ctx->groups[i])); list_for_each_entry_safe(group, tmp, &ctx->groups[i], run_node) { @@ -3242,6 +3249,18 @@ int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle) return 0; } +static struct panthor_group *group_from_handle(struct panthor_group_pool *pool, + u32 group_handle) +{ + struct panthor_group *group; + + xa_lock(&pool->xa); + group = group_get(xa_load(&pool->xa, group_handle)); + xa_unlock(&pool->xa); + + return group; +} + int panthor_group_get_state(struct panthor_file *pfile, struct drm_panthor_group_get_state *get_state) { @@ -3253,7 +3272,7 @@ int panthor_group_get_state(struct panthor_file *pfile, if (get_state->pad) return -EINVAL; - group = group_get(xa_load(&gpool->xa, get_state->group_handle)); + group = group_from_handle(gpool, get_state->group_handle); if (!group) return -EINVAL; @@ -3384,7 +3403,7 @@ panthor_job_create(struct panthor_file *pfile, job->call_info.latest_flush = qsubmit->latest_flush; INIT_LIST_HEAD(&job->node); - job->group = group_get(xa_load(&gpool->xa, group_handle)); + job->group = group_from_handle(gpool, group_handle); if (!job->group) { ret = -EINVAL; goto err_put_job; @@ -3424,13 +3443,8 @@ void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *sched { struct panthor_job *job = container_of(sched_job, struct panthor_job, base); - /* Still not sure why we want USAGE_WRITE for external objects, since I - * was assuming this would be handled through explicit syncs being imported - * to external BOs with DMA_BUF_IOCTL_IMPORT_SYNC_FILE, but other drivers - * seem to pass DMA_RESV_USAGE_WRITE, so there must be a good reason. - */ panthor_vm_update_resvs(job->group->vm, exec, &sched_job->s_fence->finished, - DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE); + DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); } void panthor_sched_unplug(struct panthor_device *ptdev) diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c index 5bc3e6b41c34..b31125eb9a65 100644 --- a/drivers/gpu/drm/radeon/atom.c +++ b/drivers/gpu/drm/radeon/atom.c @@ -27,7 +27,7 @@ #include <linux/slab.h> #include <linux/string_helpers.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <drm/drm_device.h> #include <drm/drm_util.h> diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index f36aa71c57c7..e5a6f3e7c75b 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -530,6 +530,7 @@ static const struct file_operations radeon_driver_kms_fops = { #ifdef CONFIG_COMPAT .compat_ioctl = radeon_kms_compat_ioctl, #endif + .fop_flags = FOP_UNSIGNED_OFFSET, }; static const struct drm_ioctl_desc radeon_ioctls_kms[] = { diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index b84451d59187..62e0e2dd3b0c 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -103,13 +103,17 @@ static int rockchip_drm_init_iommu(struct drm_device *drm_dev) struct rockchip_drm_private *private = drm_dev->dev_private; struct iommu_domain_geometry *geometry; u64 start, end; + int ret; if (IS_ERR_OR_NULL(private->iommu_dev)) return 0; - private->domain = iommu_domain_alloc(private->iommu_dev->bus); - if (!private->domain) - return -ENOMEM; + private->domain = iommu_paging_domain_alloc(private->iommu_dev); + if (IS_ERR(private->domain)) { + ret = PTR_ERR(private->domain); + private->domain = NULL; + return ret; + } geometry = &private->domain->geometry; start = geometry->aperture_start; diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 58c8161289fe..a75eede8bf8d 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -133,8 +133,10 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity, { WARN_ON(!num_sched_list || !sched_list); + spin_lock(&entity->rq_lock); entity->sched_list = sched_list; entity->num_sched_list = num_sched_list; + spin_unlock(&entity->rq_lock); } EXPORT_SYMBOL(drm_sched_entity_modify_sched); @@ -380,7 +382,7 @@ static void drm_sched_entity_wakeup(struct dma_fence *f, container_of(cb, struct drm_sched_entity, cb); drm_sched_entity_clear_dep(f, cb); - drm_sched_wakeup(entity->rq->sched, entity); + drm_sched_wakeup(entity->rq->sched); } /** @@ -597,6 +599,9 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) /* first job wakes up scheduler */ if (first) { + struct drm_gpu_scheduler *sched; + struct drm_sched_rq *rq; + /* Add the entity to the run queue */ spin_lock(&entity->rq_lock); if (entity->stopped) { @@ -606,13 +611,16 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) return; } - drm_sched_rq_add_entity(entity->rq, entity); + rq = entity->rq; + sched = rq->sched; + + drm_sched_rq_add_entity(rq, entity); spin_unlock(&entity->rq_lock); if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) drm_sched_rq_update_fifo(entity, submit_ts); - drm_sched_wakeup(entity->rq->sched, entity); + drm_sched_wakeup(sched); } } EXPORT_SYMBOL(drm_sched_entity_push_job); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index f093616fe53c..03c532590e2a 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -1014,15 +1014,12 @@ EXPORT_SYMBOL(drm_sched_job_cleanup); /** * drm_sched_wakeup - Wake up the scheduler if it is ready to queue * @sched: scheduler instance - * @entity: the scheduler entity * * Wake up the scheduler if we can queue jobs. */ -void drm_sched_wakeup(struct drm_gpu_scheduler *sched, - struct drm_sched_entity *entity) +void drm_sched_wakeup(struct drm_gpu_scheduler *sched) { - if (drm_sched_can_queue(sched, entity)) - drm_sched_run_job_queue(sched); + drm_sched_run_job_queue(sched); } /** diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index d9f0728c3afd..c9eb329665ec 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -801,6 +801,7 @@ static const struct file_operations tegra_drm_fops = { .read = drm_read, .compat_ioctl = drm_compat_ioctl, .llseek = noop_llseek, + .fop_flags = FOP_UNSIGNED_OFFSET, }; static int tegra_drm_context_cleanup(int id, void *p, void *data) @@ -1135,6 +1136,7 @@ static bool host1x_drm_wants_iommu(struct host1x_device *dev) static int host1x_drm_probe(struct host1x_device *dev) { + struct device *dma_dev = dev->dev.parent; struct tegra_drm *tegra; struct drm_device *drm; int err; @@ -1149,8 +1151,8 @@ static int host1x_drm_probe(struct host1x_device *dev) goto put; } - if (host1x_drm_wants_iommu(dev) && iommu_present(&platform_bus_type)) { - tegra->domain = iommu_domain_alloc(&platform_bus_type); + if (host1x_drm_wants_iommu(dev) && device_iommu_mapped(dma_dev)) { + tegra->domain = iommu_paging_domain_alloc(dma_dev); if (!tegra->domain) { err = -ENOMEM; goto free; diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c index 5ff1037a3453..62224992988f 100644 --- a/drivers/gpu/drm/udl/udl_transfer.c +++ b/drivers/gpu/drm/udl/udl_transfer.c @@ -7,7 +7,7 @@ * Copyright (C) 2009 Bernie Thompson <bernie@plugable.com> */ -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "udl_drv.h" #include "udl_proto.h" diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c index ecb80fd75b1a..ebe52bef4ffb 100644 --- a/drivers/gpu/drm/v3d/v3d_bo.c +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -291,7 +291,7 @@ v3d_wait_bo_ioctl(struct drm_device *dev, void *data, else args->timeout_ns = 0; - /* Asked to wait beyond the jiffie/scheduler precision? */ + /* Asked to wait beyond the jiffy/scheduler precision? */ if (ret == -ETIME && args->timeout_ns) ret = -EAGAIN; diff --git a/drivers/gpu/drm/vboxvideo/hgsmi_base.c b/drivers/gpu/drm/vboxvideo/hgsmi_base.c index 8c041d7ce4f1..87dccaecc3e5 100644 --- a/drivers/gpu/drm/vboxvideo/hgsmi_base.c +++ b/drivers/gpu/drm/vboxvideo/hgsmi_base.c @@ -139,7 +139,15 @@ int hgsmi_update_pointer_shape(struct gen_pool *ctx, u32 flags, flags |= VBOX_MOUSE_POINTER_VISIBLE; } - p = hgsmi_buffer_alloc(ctx, sizeof(*p) + pixel_len, HGSMI_CH_VBVA, + /* + * The 4 extra bytes come from switching struct vbva_mouse_pointer_shape + * from having a 4 bytes fixed array at the end to using a proper VLA + * at the end. These 4 extra bytes were not subtracted from sizeof(*p) + * before the switch to the VLA, so this way the behavior is unchanged. + * Chances are these 4 extra bytes are not necessary but they are kept + * to avoid regressions. + */ + p = hgsmi_buffer_alloc(ctx, sizeof(*p) + pixel_len + 4, HGSMI_CH_VBVA, VBVA_MOUSE_POINTER_SHAPE); if (!p) return -ENOMEM; diff --git a/drivers/gpu/drm/vboxvideo/vbox_main.c b/drivers/gpu/drm/vboxvideo/vbox_main.c index d4ade9325401..7f686a0190e6 100644 --- a/drivers/gpu/drm/vboxvideo/vbox_main.c +++ b/drivers/gpu/drm/vboxvideo/vbox_main.c @@ -114,6 +114,10 @@ int vbox_hw_init(struct vbox_private *vbox) DRM_INFO("VRAM %08x\n", vbox->full_vram_size); + ret = pcim_request_region(pdev, 0, "vboxvideo"); + if (ret) + return ret; + /* Map guest-heap at end of vram */ vbox->guest_heap = pcim_iomap_range(pdev, 0, GUEST_HEAP_OFFSET(vbox), GUEST_HEAP_SIZE); diff --git a/drivers/gpu/drm/vboxvideo/vboxvideo.h b/drivers/gpu/drm/vboxvideo/vboxvideo.h index f60d82504da0..79ec8481de0e 100644 --- a/drivers/gpu/drm/vboxvideo/vboxvideo.h +++ b/drivers/gpu/drm/vboxvideo/vboxvideo.h @@ -351,10 +351,8 @@ struct vbva_mouse_pointer_shape { * Bytes in the gap between the AND and the XOR mask are undefined. * XOR mask scanlines have no gap between them and size of XOR mask is: * xor_len = width * 4 * height. - * - * Preallocate 4 bytes for accessing actual data as p->data. */ - u8 data[4]; + u8 data[]; } __packed; /* pointer is visible */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 50ad3105c16e..2825dd3149ed 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1609,6 +1609,7 @@ static const struct file_operations vmwgfx_driver_fops = { .compat_ioctl = vmw_compat_ioctl, #endif .llseek = noop_llseek, + .fop_flags = FOP_UNSIGNED_OFFSET, }; static const struct drm_driver driver = { diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 0d1a4a9f4e11..ac9c437e103d 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -105,6 +105,7 @@ #define CHICKEN_RASTER_1 XE_REG_MCR(0x6204, XE_REG_OPTION_MASKED) #define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) +#define DIS_CLIP_NEGATIVE_BOUNDING_BOX REG_BIT(6) #define CHICKEN_RASTER_2 XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED) #define TBIMR_FAST_CLIP REG_BIT(5) @@ -168,6 +169,8 @@ #define XEHP_SLICE_COMMON_ECO_CHICKEN1 XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED) #define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) +#define XE2LPM_CCCHKNREG1 XE_REG(0x82a8) + #define VF_PREEMPTION XE_REG(0x83a4, XE_REG_OPTION_MASKED) #define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0) @@ -377,6 +380,9 @@ #define L3SQCREG3 XE_REG_MCR(0xb108) #define COMPPWOVERFETCHEN REG_BIT(28) +#define SCRATCH3_LBCF XE_REG_MCR(0xb154) +#define RWFLUSHALLEN REG_BIT(17) + #define XEHP_L3SQCREG5 XE_REG_MCR(0xb158) #define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0) @@ -390,6 +396,12 @@ #define SCRATCH1LPFC XE_REG(0xb474) #define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0) +#define XE2LPM_L3SQCREG2 XE_REG_MCR(0xb604) + +#define XE2LPM_L3SQCREG3 XE_REG_MCR(0xb608) + +#define XE2LPM_SCRATCH3_LBCF XE_REG_MCR(0xb654) + #define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658) #define XE2_TDF_CTRL XE_REG(0xb418) diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h index 1189f5a540a8..a9b0091cb7ee 100644 --- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -52,6 +52,7 @@ #define OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */ #define OAG_OACONTROL XE_REG(0xdaf4) +#define OAG_OACONTROL_OA_PES_DISAG_EN REG_GENMASK(27, 22) #define OAG_OACONTROL_OA_CCS_SELECT_MASK REG_GENMASK(18, 16) #define OAG_OACONTROL_OA_COUNTER_SEL_MASK REG_GENMASK(4, 2) #define OAG_OACONTROL_OA_COUNTER_ENABLE REG_BIT(0) diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index a13e0b3a169e..ef777dbdf4ec 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -65,7 +65,8 @@ __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr) { u32 size = drm_suballoc_size(bb->bo); - bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + if (bb->len == 0 || bb->cs[bb->len - 1] != MI_BATCH_BUFFER_END) + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 06911e9a3bf5..e5f51fd23c65 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -680,8 +680,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, tt_has_data = ttm && (ttm_tt_is_populated(ttm) || (ttm->page_flags & TTM_TT_FLAG_SWAPPED)); - move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared) : - (!mem_type_is_vram(old_mem_type) && !tt_has_data); + move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) : + (!mem_type_is_vram(old_mem_type) && !tt_has_data)); needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || (!ttm && ttm_bo->type == ttm_bo_type_device); @@ -2320,6 +2320,20 @@ void xe_bo_put_commit(struct llist_head *deferred) drm_gem_object_free(&bo->ttm.base.refcount); } +void xe_bo_put(struct xe_bo *bo) +{ + might_sleep(); + if (bo) { +#ifdef CONFIG_PROC_FS + if (bo->client) + might_lock(&bo->client->bos_lock); +#endif + if (bo->ggtt_node && bo->ggtt_node->ggtt) + might_lock(&bo->ggtt_node->ggtt->lock); + drm_gem_object_put(&bo->ttm.base); + } +} + /** * xe_bo_dumb_create - Create a dumb bo as backing for a fb * @file_priv: ... diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index dbfb3209615d..6e4be52306df 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -126,11 +126,7 @@ static inline struct xe_bo *xe_bo_get(struct xe_bo *bo) return bo; } -static inline void xe_bo_put(struct xe_bo *bo) -{ - if (bo) - drm_gem_object_put(&bo->ttm.base); -} +void xe_bo_put(struct xe_bo *bo); static inline void __xe_bo_unset_bulk_move(struct xe_bo *bo) { diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index a64bae36e0e3..668615c6b172 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -6,6 +6,7 @@ #include "xe_debugfs.h" #include <linux/debugfs.h> +#include <linux/fault-inject.h> #include <linux/string_helpers.h> #include <drm/drm_debugfs.h> @@ -26,10 +27,7 @@ #include "xe_vm.h" #endif -#ifdef CONFIG_FAULT_INJECTION -#include <linux/fault-inject.h> /* XXX: fault-inject.h is broken */ DECLARE_FAULT_ATTR(gt_reset_failure); -#endif static struct xe_device *node_to_xe(struct drm_info_node *node) { @@ -213,8 +211,5 @@ void xe_debugfs_register(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_debugfs_register(gt); -#ifdef CONFIG_FAULT_INJECTION fault_create_debugfs_attr("fail_gt_reset", root, >_reset_failure); -#endif - } diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 1a0d7fdd094b..5a63d135ba96 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -171,10 +171,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) xe_exec_queue_kill(q); xe_exec_queue_put(q); } - mutex_lock(&xef->vm.lock); xa_for_each(&xef->vm.xa, idx, vm) xe_vm_close_and_put(vm); - mutex_unlock(&xef->vm.lock); xe_file_put(xef); @@ -253,6 +251,7 @@ static const struct file_operations xe_driver_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = drm_show_fdinfo, #endif + .fop_flags = FOP_UNSIGNED_OFFSET, }; static struct drm_driver driver = { @@ -297,6 +296,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy) if (xe->unordered_wq) destroy_workqueue(xe->unordered_wq); + if (xe->destroy_wq) + destroy_workqueue(xe->destroy_wq); + ttm_device_fini(&xe->ttm); } @@ -335,9 +337,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, init_waitqueue_head(&xe->ufence_wq); - err = drmm_mutex_init(&xe->drm, &xe->usm.lock); - if (err) - goto err; + init_rwsem(&xe->usm.lock); xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC); @@ -362,8 +362,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0); xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); + xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0); if (!xe->ordered_wq || !xe->unordered_wq || - !xe->preempt_fence_wq) { + !xe->preempt_fence_wq || !xe->destroy_wq) { /* * Cleanup done in xe_device_destroy via * drmm_add_action_or_reset register above diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index ec7eb7811126..09d731a9125c 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -369,7 +369,7 @@ struct xe_device { /** @usm.next_asid: next ASID, used to cyclical alloc asids */ u32 next_asid; /** @usm.lock: protects UM state */ - struct mutex lock; + struct rw_semaphore lock; } usm; /** @pinned: pinned BO state */ @@ -396,6 +396,9 @@ struct xe_device { /** @unordered_wq: used to serialize unordered work, mostly display */ struct workqueue_struct *unordered_wq; + /** @destroy_wq: used to serialize user destroy work, like queue */ + struct workqueue_struct *destroy_wq; + /** @tiles: device tiles */ struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE]; @@ -567,15 +570,23 @@ struct xe_file { struct { /** @vm.xe: xarray to store VMs */ struct xarray xa; - /** @vm.lock: protects file VM state */ + /** + * @vm.lock: Protects VM lookup + reference and removal a from + * file xarray. Not an intended to be an outer lock which does + * thing while being held. + */ struct mutex lock; } vm; /** @exec_queue: Submission exec queue state for file */ struct { - /** @exec_queue.xe: xarray to store engines */ + /** @exec_queue.xa: xarray to store exece queues */ struct xarray xa; - /** @exec_queue.lock: protects file engine state */ + /** + * @exec_queue.lock: Protects exec queue lookup + reference and + * removal a frommfile xarray. Not an intended to be an outer + * lock which does thing while being held. + */ struct mutex lock; } exec_queue; diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index e64f4b645e2e..fb52a23e28f8 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -10,6 +10,7 @@ #include <linux/slab.h> #include <linux/types.h> +#include "xe_assert.h" #include "xe_bo.h" #include "xe_bo_types.h" #include "xe_device_types.h" @@ -151,10 +152,13 @@ void xe_drm_client_add_bo(struct xe_drm_client *client, */ void xe_drm_client_remove_bo(struct xe_bo *bo) { + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); struct xe_drm_client *client = bo->client; + xe_assert(xe, !kref_read(&bo->ttm.base.refcount)); + spin_lock(&client->bos_lock); - list_del(&bo->client_link); + list_del_init(&bo->client_link); spin_unlock(&client->bos_lock); xe_drm_client_put(client); @@ -164,12 +168,9 @@ static void bo_meminfo(struct xe_bo *bo, struct drm_memory_stats stats[TTM_NUM_MEM_TYPES]) { u64 sz = bo->size; - u32 mem_type; + u32 mem_type = bo->ttm.resource->mem_type; - if (bo->placement.placement) - mem_type = bo->placement.placement->mem_type; - else - mem_type = XE_PL_TT; + xe_bo_assert_held(bo); if (drm_gem_object_is_shared_for_memory_stats(&bo->ttm.base)) stats[mem_type].shared += sz; @@ -196,6 +197,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) struct xe_drm_client *client; struct drm_gem_object *obj; struct xe_bo *bo; + LLIST_HEAD(deferred); unsigned int id; u32 mem_type; @@ -206,7 +208,20 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) idr_for_each_entry(&file->object_idr, obj, id) { struct xe_bo *bo = gem_to_xe_bo(obj); - bo_meminfo(bo, stats); + if (dma_resv_trylock(bo->ttm.base.resv)) { + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + } else { + xe_bo_get(bo); + spin_unlock(&file->table_lock); + + xe_bo_lock(bo, false); + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + + xe_bo_put(bo); + spin_lock(&file->table_lock); + } } spin_unlock(&file->table_lock); @@ -215,11 +230,28 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) list_for_each_entry(bo, &client->bos_list, client_link) { if (!kref_get_unless_zero(&bo->ttm.base.refcount)) continue; - bo_meminfo(bo, stats); - xe_bo_put(bo); + + if (dma_resv_trylock(bo->ttm.base.resv)) { + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + } else { + spin_unlock(&client->bos_lock); + + xe_bo_lock(bo, false); + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + + spin_lock(&client->bos_lock); + /* The bo ref will prevent this bo from being removed from the list */ + xe_assert(xef->xe, !list_empty(&bo->client_link)); + } + + xe_bo_put_deferred(bo, &deferred); } spin_unlock(&client->bos_lock); + xe_bo_put_commit(&deferred); + for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) { if (!xe_mem_type_to_name[mem_type]) continue; @@ -251,8 +283,15 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) /* Accumulate all the exec queues from this client */ mutex_lock(&xef->exec_queue.lock); - xa_for_each(&xef->exec_queue.xa, i, q) + xa_for_each(&xef->exec_queue.xa, i, q) { + xe_exec_queue_get(q); + mutex_unlock(&xef->exec_queue.lock); + xe_exec_queue_update_run_ticks(q); + + mutex_lock(&xef->exec_queue.lock); + xe_exec_queue_put(q); + } mutex_unlock(&xef->exec_queue.lock); /* Get the total GPU cycles */ diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 5a9cbc97f0be..d098d2dd1b2d 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -223,8 +223,10 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, gt->usm.reserved_bcs_instance, false); - if (!hwe) + if (!hwe) { + xe_vm_put(migrate_vm); return ERR_PTR(-EINVAL); + } q = xe_exec_queue_create(xe, migrate_vm, BIT(hwe->logical_instance), 1, hwe, @@ -633,14 +635,14 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, } } - mutex_lock(&xef->exec_queue.lock); + q->xef = xe_file_get(xef); + + /* user id alloc must always be last in ioctl to prevent UAF */ err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); - mutex_unlock(&xef->exec_queue.lock); if (err) goto kill_exec_queue; args->exec_queue_id = id; - q->xef = xe_file_get(xef); return 0; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index f3fca5565d32..2895f154654c 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -619,16 +619,19 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, bo->ggtt_node = xe_ggtt_node_init(ggtt); if (IS_ERR(bo->ggtt_node)) { err = PTR_ERR(bo->ggtt_node); + bo->ggtt_node = NULL; goto out; } mutex_lock(&ggtt->lock); err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node->base, bo->size, alignment, 0, start, end, 0); - if (err) + if (err) { xe_ggtt_node_fini(bo->ggtt_node); - else + bo->ggtt_node = NULL; + } else { xe_ggtt_map_bo(ggtt, bo); + } mutex_unlock(&ggtt->lock); if (!err && bo->flags & XE_BO_FLAG_GGTT_INVALIDATE) diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c index c518d1d16d82..50361b4638f9 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -90,6 +90,11 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched) cancel_work_sync(&sched->work_process_msg); } +void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched) +{ + drm_sched_resume_timeout(&sched->base, sched->base.timeout); +} + void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg) { diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index cee9c6809fc0..5ad5629a6c60 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -22,6 +22,8 @@ void xe_sched_fini(struct xe_gpu_scheduler *sched); void xe_sched_submission_start(struct xe_gpu_scheduler *sched); void xe_sched_submission_stop(struct xe_gpu_scheduler *sched); +void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched); + void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg); void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index dd96dec95b19..f0dc2bf24c7b 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -12,8 +12,6 @@ #include <generated/xe_wa_oob.h> -#include <generated/xe_wa_oob.h> - #include "instructions/xe_gfxpipe_commands.h" #include "instructions/xe_mi_commands.h" #include "regs/xe_gt_regs.h" diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 8b1a5027dcf2..ee138e9768a2 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -6,6 +6,8 @@ #ifndef _XE_GT_H_ #define _XE_GT_H_ +#include <linux/fault-inject.h> + #include <drm/drm_util.h> #include "xe_device.h" @@ -19,19 +21,11 @@ #define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> XE_HW_ENGINE_CCS0) -#ifdef CONFIG_FAULT_INJECTION -#include <linux/fault-inject.h> /* XXX: fault-inject.h is broken */ extern struct fault_attr gt_reset_failure; static inline bool xe_fault_inject_gt_reset(void) { return should_fail(>_reset_failure, 1); } -#else -static inline bool xe_fault_inject_gt_reset(void) -{ - return false; -} -#endif struct xe_gt *xe_gt_alloc(struct xe_tile *tile); int xe_gt_init_hwconfig(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 68a5778b4319..ab76973f3e1e 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -237,11 +237,11 @@ int xe_gt_freq_init(struct xe_gt *gt) if (!gt->freq) return -ENOMEM; - err = devm_add_action(xe->drm.dev, freq_fini, gt->freq); + err = sysfs_create_files(gt->freq, freq_attrs); if (err) return err; - err = sysfs_create_files(gt->freq, freq_attrs); + err = devm_add_action_or_reset(xe->drm.dev, freq_fini, gt->freq); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 7d7bd0be6233..c834f64b0178 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -439,7 +439,7 @@ void xe_gt_mcr_init(struct xe_gt *gt) if (gt->info.type == XE_GT_TYPE_MEDIA) { drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13); - if (MEDIA_VER(xe) >= 20) { + if (MEDIA_VERx100(xe) >= 1301) { gt->steering[OADDRM].ranges = xe2lpm_gpmxmt_steering_table; gt->steering[INSTANCE0].ranges = xe2lpm_instance0_steering_table; } else { diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 730eec07795e..79c426dc2505 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -185,6 +185,21 @@ unlock_dma_resv: return err; } +static struct xe_vm *asid_to_vm(struct xe_device *xe, u32 asid) +{ + struct xe_vm *vm; + + down_read(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, asid); + if (vm && xe_vm_in_fault_mode(vm)) + xe_vm_get(vm); + else + vm = ERR_PTR(-EINVAL); + up_read(&xe->usm.lock); + + return vm; +} + static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) { struct xe_device *xe = gt_to_xe(gt); @@ -197,21 +212,20 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) if (pf->trva_fault) return -EFAULT; - /* ASID to VM */ - mutex_lock(&xe->usm.lock); - vm = xa_load(&xe->usm.asid_to_vm, pf->asid); - if (vm && xe_vm_in_fault_mode(vm)) - xe_vm_get(vm); - else - vm = NULL; - mutex_unlock(&xe->usm.lock); - if (!vm) - return -EINVAL; + vm = asid_to_vm(xe, pf->asid); + if (IS_ERR(vm)) + return PTR_ERR(vm); /* * TODO: Change to read lock? Using write lock for simplicity. */ down_write(&vm->lock); + + if (xe_vm_is_closed(vm)) { + err = -ENOENT; + goto unlock_vm; + } + vma = lookup_vma(vm, pf->page_addr); if (!vma) { err = -EINVAL; @@ -542,14 +556,9 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc) if (acc->access_type != ACC_TRIGGER) return -EINVAL; - /* ASID to VM */ - mutex_lock(&xe->usm.lock); - vm = xa_load(&xe->usm.asid_to_vm, acc->asid); - if (vm) - xe_vm_get(vm); - mutex_unlock(&xe->usm.lock); - if (!vm || !xe_vm_in_fault_mode(vm)) - return -EINVAL; + vm = asid_to_vm(xe, acc->asid); + if (IS_ERR(vm)) + return PTR_ERR(vm); down_read(&vm->lock); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index a95e546b7744..8250ef71e685 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -399,7 +399,7 @@ static void pf_release_vf_config_ggtt(struct xe_gt *gt, struct xe_gt_sriov_confi static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - struct xe_ggtt_node *node = config->ggtt_region; + struct xe_ggtt_node *node; struct xe_tile *tile = gt_to_tile(gt); struct xe_ggtt *ggtt = tile->mem.ggtt; u64 alignment = pf_get_ggtt_alignment(gt); @@ -411,14 +411,14 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) size = round_up(size, alignment); - if (xe_ggtt_node_allocated(node)) { + if (xe_ggtt_node_allocated(config->ggtt_region)) { err = pf_distribute_config_ggtt(tile, vfid, 0, 0); if (unlikely(err)) return err; - pf_release_ggtt(tile, node); + pf_release_vf_config_ggtt(gt, config); } - xe_gt_assert(gt, !xe_ggtt_node_allocated(node)); + xe_gt_assert(gt, !xe_ggtt_node_allocated(config->ggtt_region)); if (!size) return 0; diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c index a05c3699e8b9..ec2b8246204b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sysfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c @@ -51,5 +51,5 @@ int xe_gt_sysfs_init(struct xe_gt *gt) gt->sysfs = &kg->base; - return devm_add_action(xe->drm.dev, gt_sysfs_fini, gt); + return devm_add_action_or_reset(xe->drm.dev, gt_sysfs_fini, gt); } diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index c3e6b51f7a09..42116b167c98 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -18,8 +18,10 @@ */ #define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) #define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch) -#define GUC_SUBMIT_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]) -#define GUC_FIRMWARE_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE]) +#define GUC_SUBMIT_VER(guc) \ + MAKE_GUC_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]) +#define GUC_FIRMWARE_VER(guc) \ + MAKE_GUC_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE]) struct drm_printer; diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index fbbe6a487bbb..80062e1d3f66 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -276,10 +276,26 @@ static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) } #endif +static void xe_guc_submit_fini(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + ret = wait_event_timeout(guc->submission_state.fini_wq, + xa_empty(&guc->submission_state.exec_queue_lookup), + HZ * 5); + + drain_workqueue(xe->destroy_wq); + + xe_gt_assert(gt, ret); +} + static void guc_submit_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; + xe_guc_submit_fini(guc); xa_destroy(&guc->submission_state.exec_queue_lookup); free_submit_wq(guc); } @@ -290,9 +306,15 @@ static void guc_submit_wedged_fini(void *arg) struct xe_exec_queue *q; unsigned long index; - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - if (exec_queue_wedged(q)) + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + if (exec_queue_wedged(q)) { + mutex_unlock(&guc->submission_state.lock); xe_exec_queue_put(q); + mutex_lock(&guc->submission_state.lock); + } + } + mutex_unlock(&guc->submission_state.lock); } static const struct xe_exec_queue_ops guc_exec_queue_ops; @@ -345,6 +367,8 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) xa_init(&guc->submission_state.exec_queue_lookup); + init_waitqueue_head(&guc->submission_state.fini_wq); + primelockdep(guc); return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); @@ -361,6 +385,9 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa xe_guc_id_mgr_release_locked(&guc->submission_state.idm, q->guc->id, q->width); + + if (xa_empty(&guc->submission_state.exec_queue_lookup)) + wake_up(&guc->submission_state.fini_wq); } static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) @@ -1268,13 +1295,16 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) static void guc_exec_queue_fini_async(struct xe_exec_queue *q) { + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); /* We must block on kernel engines so slabs are empty on driver unload */ if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) __guc_exec_queue_fini_async(&q->guc->fini_async); else - queue_work(system_wq, &q->guc->fini_async); + queue_work(xe->destroy_wq, &q->guc->fini_async); } static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) @@ -1796,6 +1826,7 @@ static void guc_exec_queue_start(struct xe_exec_queue *q) } xe_sched_submission_start(sched); + xe_sched_submission_resume_tdr(sched); } int xe_guc_submit_start(struct xe_guc *guc) diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index 546ac6350a31..69046f698271 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -81,6 +81,8 @@ struct xe_guc { #endif /** @submission_state.enabled: submission is enabled */ bool enabled; + /** @submission_state.fini_wq: submit fini wait queue */ + wait_queue_head_t fini_wq; } submission_state; /** @hwconfig: Hardware config state */ struct { diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 63286ed8457f..2804f14f8f29 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -440,6 +440,10 @@ static void xe_oa_enable(struct xe_oa_stream *stream) val = __format_to_oactrl(format, regs->oa_ctrl_counter_select_mask) | __oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE; + if (GRAPHICS_VER(stream->oa->xe) >= 20 && + stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) + val |= OAG_OACONTROL_OA_PES_DISAG_EN; + xe_mmio_write32(stream->gt, regs->oa_ctrl, val); } @@ -705,8 +709,7 @@ static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, - enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) + _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE), }, }; struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol }; @@ -738,10 +741,8 @@ static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, - enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | - _MASKED_FIELD(CTX_CTRL_RUN_ALONE, - enable ? CTX_CTRL_RUN_ALONE : 0), + _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) | + _MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0), }, }; struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol }; @@ -1259,7 +1260,6 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) static const struct file_operations xe_oa_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .release = xe_oa_release, .poll = xe_oa_poll, .read = xe_oa_read, diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 937c3e064f0d..5e962e72c97e 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -924,6 +924,8 @@ static int xe_pci_resume(struct device *dev) if (err) return err; + pci_restore_state(pdev); + err = pci_enable_device(pdev); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index e518557e0eec..7cf2160fe040 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -416,7 +416,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_display_pm_suspend_late(xe); out: if (err) - xe_display_pm_resume(xe, true); + xe_display_pm_runtime_resume(xe); xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return err; @@ -595,6 +595,22 @@ bool xe_pm_runtime_get_if_in_use(struct xe_device *xe) return pm_runtime_get_if_in_use(xe->drm.dev) > 0; } +/* + * Very unreliable! Should only be used to suppress the false positive case + * in the missing outer rpm protection warning. + */ +static bool xe_pm_suspending_or_resuming(struct xe_device *xe) +{ +#ifdef CONFIG_PM + struct device *dev = xe->drm.dev; + + return dev->power.runtime_status == RPM_SUSPENDING || + dev->power.runtime_status == RPM_RESUMING; +#else + return false; +#endif +} + /** * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming * @xe: xe device instance @@ -611,8 +627,11 @@ void xe_pm_runtime_get_noresume(struct xe_device *xe) ref = xe_pm_runtime_get_if_in_use(xe); - if (drm_WARN(&xe->drm, !ref, "Missing outer runtime PM protection\n")) + if (!ref) { pm_runtime_get_noresume(xe->drm.dev); + drm_WARN(&xe->drm, !xe_pm_suspending_or_resuming(xe), + "Missing outer runtime PM protection\n"); + } } /** diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index d6353e8969f0..f27f579f4d85 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -2188,5 +2188,5 @@ void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) pt_op->num_entries); } - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); + xe_pt_update_ops_fini(tile, vops); } diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index faa1bf42e50e..0d5e04158917 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -42,20 +42,48 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX), SET(CCCHKNREG1, L3CMPCTRL)) }, + { XE_RTP_NAME("Tuning: Compression Overfetch - media"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX), + SET(XE2LPM_CCCHKNREG1, L3CMPCTRL)) + }, { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN)) }, + { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN)) + }, { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(SET(L3SQCREG2, COMPMEMRD256BOVRFETCHEN)) }, + { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2, + COMPMEMRD256BOVRFETCHEN)) + }, { XE_RTP_NAME("Tuning: Stateless compression control"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) }, + { XE_RTP_NAME("Tuning: Stateless compression control - media"), + XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, 2000)), + XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, + REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) + }, + { XE_RTP_NAME("Tuning: L3 RW flush all Cache"), + XE_RTP_RULES(GRAPHICS_VERSION(2004)), + XE_RTP_ACTIONS(SET(SCRATCH3_LBCF, RWFLUSHALLEN)) + }, + { XE_RTP_NAME("Tuning: L3 RW flush all cache - media"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN)) + }, + {} }; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 7acd5fc9d032..ce9dca4d4e87 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1613,7 +1613,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) up_write(&vm->lock); - mutex_lock(&xe->usm.lock); + down_write(&xe->usm.lock); if (vm->usm.asid) { void *lookup; @@ -1623,7 +1623,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); xe_assert(xe, lookup == vm); } - mutex_unlock(&xe->usm.lock); + up_write(&xe->usm.lock); for_each_tile(tile, xe, id) xe_range_fence_tree_fini(&vm->rftree[id]); @@ -1765,25 +1765,18 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(vm)) return PTR_ERR(vm); - mutex_lock(&xef->vm.lock); - err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); - mutex_unlock(&xef->vm.lock); - if (err) - goto err_close_and_put; - if (xe->info.has_asid) { - mutex_lock(&xe->usm.lock); + down_write(&xe->usm.lock); err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, XA_LIMIT(1, XE_MAX_ASID - 1), &xe->usm.next_asid, GFP_KERNEL); - mutex_unlock(&xe->usm.lock); + up_write(&xe->usm.lock); if (err < 0) - goto err_free_id; + goto err_close_and_put; vm->usm.asid = asid; } - args->vm_id = id; vm->xef = xe_file_get(xef); /* Record BO memory for VM pagetable created against client */ @@ -1796,12 +1789,15 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); #endif + /* user id alloc must always be last in ioctl to prevent UAF */ + err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); + if (err) + goto err_close_and_put; + + args->vm_id = id; + return 0; -err_free_id: - mutex_lock(&xef->vm.lock); - xa_erase(&xef->vm.xa, id); - mutex_unlock(&xef->vm.lock); err_close_and_put: xe_vm_close_and_put(vm); diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index 5bcd59190353..80ba2fc78837 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -182,6 +182,7 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size) offset = offset_hi << 32; /* HW view bits 39:32 */ offset |= offset_lo << 6; /* HW view bits 31:6 */ offset *= num_enabled; /* convert to SW view */ + offset = round_up(offset, SZ_128K); /* SW must round up to nearest 128K */ /* We don't expect any holes */ xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(gt, GSMBASE) - ccs_size), diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 28b7f95b6c2f..d424992514a4 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -733,6 +733,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_PARTIAL_AUTOSTRIP | DIS_AUTOSTRIP)) }, + { XE_RTP_NAME("15016589081"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) + }, {} }; |