diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
34 files changed, 1265 insertions, 562 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index fa26a4e3a99d..4536c8ad0e11 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -98,7 +98,7 @@ amdgpu-y += \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \ nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \ sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \ - nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o + nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o # add DF block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 1afbb2e932c6..f5d0fa207a88 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1782,9 +1782,14 @@ static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused) list_for_each_entry(file, &dev->filelist, lhead) { struct amdgpu_fpriv *fpriv = file->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_task_info *ti; + + ti = amdgpu_vm_get_task_info_vm(vm); + if (ti) { + seq_printf(m, "pid:%d\tProcess:%s ----------\n", ti->pid, ti->process_name); + amdgpu_vm_put_task_info(ti); + } - seq_printf(m, "pid:%d\tProcess:%s ----------\n", - vm->task_info.pid, vm->task_info.process_name); r = amdgpu_bo_reserve(vm->root.bo, true); if (r) break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e68bd6f8a6a4..1e9454e6e4cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4056,13 +4056,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, goto unmap_memory; } + amdgpu_device_set_mcbp(adev); + /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) goto unmap_memory; - amdgpu_device_set_mcbp(adev); - /* Get rid of things like offb */ r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 502333725b49..a07e4b87d4ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -61,6 +61,7 @@ #include "nbio_v4_3.h" #include "nbio_v7_2.h" #include "nbio_v7_7.h" +#include "nbif_v6_3_1.h" #include "hdp_v5_0.h" #include "hdp_v5_2.h" #include "hdp_v6_0.h" @@ -1319,6 +1320,15 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) } } + if (le16_to_cpu(ip->hw_id) == VPE_HWID) { + if (adev->vpe.num_instances < AMDGPU_MAX_VPE_INSTANCES) + adev->vpe.num_instances++; + else + dev_err(adev->dev, "Too many VPE instances: %d vs %d\n", + adev->vpe.num_instances + 1, + AMDGPU_MAX_VPE_INSTANCES); + } + if (le16_to_cpu(ip->hw_id) == UMC_HWID) { adev->gmc.num_umc++; adev->umc.node_inst_num++; @@ -1936,6 +1946,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 2, 0): case IP_VERSION(3, 2, 1): case IP_VERSION(3, 5, 0): + case IP_VERSION(3, 5, 1): if (amdgpu_sriov_vf(adev)) amdgpu_discovery_set_sriov_display(adev); else @@ -2212,6 +2223,7 @@ static int amdgpu_discovery_set_vpe_ip_blocks(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) { case IP_VERSION(6, 1, 0): + case IP_VERSION(6, 1, 1): amdgpu_device_ip_block_add(adev, &vpe_v6_1_ip_block); break; default: @@ -2558,6 +2570,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->nbio.funcs = &nbio_v7_7_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg; break; + case IP_VERSION(6, 3, 1): + adev->nbio.funcs = &nbif_v6_3_1_funcs; + adev->nbio.hdp_flush_reg = &nbif_v6_3_1_hdp_flush_reg; + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 6acffedf648c..15b188aaf681 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2693,7 +2693,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) } adev->in_runpm = true; - if (amdgpu_device_supports_px(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; /* @@ -2703,7 +2703,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) * platforms. * TODO: this may be also needed for PX capable platform. */ - if (amdgpu_device_supports_boco(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) adev->mp1_state = PP_MP1_STATE_UNLOAD; ret = amdgpu_device_prepare(drm_dev); @@ -2712,15 +2712,15 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) ret = amdgpu_device_suspend(drm_dev, false); if (ret) { adev->in_runpm = false; - if (amdgpu_device_supports_boco(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) adev->mp1_state = PP_MP1_STATE_NONE; return ret; } - if (amdgpu_device_supports_boco(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) adev->mp1_state = PP_MP1_STATE_NONE; - if (amdgpu_device_supports_px(drm_dev)) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) { /* Only need to handle PCI state in the driver for ATPX * PCI core handles it for _PR3. */ @@ -2729,9 +2729,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) pci_ignore_hotplug(pdev); pci_set_power_state(pdev, PCI_D3cold); drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF; - } else if (amdgpu_device_supports_boco(drm_dev)) { + } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) { /* nothing to do */ - } else if (amdgpu_device_supports_baco(drm_dev)) { + } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) { amdgpu_device_baco_enter(drm_dev); } @@ -2754,7 +2754,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) if (!pci_device_is_present(adev->pdev)) adev->no_hw_access = true; - if (amdgpu_device_supports_px(drm_dev)) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) { drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; /* Only need to handle PCI state in the driver for ATPX @@ -2766,22 +2766,22 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) if (ret) return ret; pci_set_master(pdev); - } else if (amdgpu_device_supports_boco(drm_dev)) { + } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) { /* Only need to handle PCI state in the driver for ATPX * PCI core handles it for _PR3. */ pci_set_master(pdev); - } else if (amdgpu_device_supports_baco(drm_dev)) { + } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) { amdgpu_device_baco_exit(drm_dev); } ret = amdgpu_device_resume(drm_dev, false); if (ret) { - if (amdgpu_device_supports_px(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) pci_disable_device(pdev); return ret; } - if (amdgpu_device_supports_px(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) drm_dev->switch_power_state = DRM_SWITCH_POWER_ON; adev->in_runpm = false; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 22aeee8adb71..67c234bcf89f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -208,9 +208,15 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, if (!WARN_ON(!vm->process_info->eviction_fence)) { r = amdgpu_amdkfd_bo_validate_and_fence(abo, AMDGPU_GEM_DOMAIN_GTT, &vm->process_info->eviction_fence->base); - if (r) - dev_warn(adev->dev, "%d: validate_and_fence failed: %d\n", - vm->task_info.pid, r); + if (r) { + struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm); + + dev_warn(adev->dev, "validate_and_fence failed: %d\n", r); + if (ti) { + dev_warn(adev->dev, "pid %d\n", ti->pid); + amdgpu_vm_put_task_info(ti); + } + } } mutex_unlock(&vm->process_info->lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 6aa3b1d845ab..8b512dc28df8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -131,7 +131,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, struct amdgpu_ib *ib = &ibs[0]; struct dma_fence *tmp = NULL; bool need_ctx_switch; - unsigned int patch_offset = ~0; struct amdgpu_vm *vm; uint64_t fence_ctx; uint32_t status = 0, alloc_size; @@ -139,10 +138,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, bool secure, init_shadow; u64 shadow_va, csa_va, gds_va; int vmid = AMDGPU_JOB_GET_VMID(job); + bool need_pipe_sync = false; + unsigned int cond_exec; unsigned int i; int r = 0; - bool need_pipe_sync = false; if (num_ibs == 0) return -EINVAL; @@ -228,7 +228,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, init_shadow, vmid); if (ring->funcs->init_cond_exec) - patch_offset = amdgpu_ring_init_cond_exec(ring); + cond_exec = amdgpu_ring_init_cond_exec(ring, + ring->cond_exe_gpu_addr); amdgpu_device_flush_hdp(adev, ring); @@ -278,16 +279,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, fence_flags | AMDGPU_FENCE_FLAG_64BIT); } - if (ring->funcs->emit_gfx_shadow) { + if (ring->funcs->emit_gfx_shadow && ring->funcs->init_cond_exec) { amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0); - - if (ring->funcs->init_cond_exec) { - unsigned int ce_offset = ~0; - - ce_offset = amdgpu_ring_init_cond_exec(ring); - if (ce_offset != ~0 && ring->funcs->patch_cond_exec) - amdgpu_ring_patch_cond_exec(ring, ce_offset); - } + amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr); } r = amdgpu_fence_emit(ring, f, job, fence_flags); @@ -302,8 +296,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, if (ring->funcs->insert_end) ring->funcs->insert_end(ring); - if (patch_offset != ~0 && ring->funcs->patch_cond_exec) - amdgpu_ring_patch_cond_exec(ring, patch_offset); + amdgpu_ring_patch_cond_exec(ring, cond_exec); ring->current_ctx = fence_ctx; if (vm && ring->funcs->emit_switch_buffer) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 71a5cf37b472..4b3000c21ef2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -35,7 +35,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) { struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); struct amdgpu_job *job = to_amdgpu_job(s_job); - struct amdgpu_task_info ti; + struct amdgpu_task_info *ti; struct amdgpu_device *adev = ring->adev; int idx; int r; @@ -48,7 +48,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) return DRM_GPU_SCHED_STAT_ENODEV; } - memset(&ti, 0, sizeof(struct amdgpu_task_info)); + adev->job_hang = true; if (amdgpu_gpu_recovery && @@ -58,12 +58,16 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) goto exit; } - amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti); DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n", - job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), - ring->fence_drv.sync_seq); - DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n", - ti.process_name, ti.tgid, ti.task_name, ti.pid); + job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), + ring->fence_drv.sync_seq); + + ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid); + if (ti) { + DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n", + ti->process_name, ti->tgid, ti->task_name, ti->pid); + amdgpu_vm_put_task_info(ti); + } dma_fence_set_error(&s_job->s_fence->finished, -ETIME); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 4baa300121d8..147100c27c2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -196,6 +196,13 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, coredump->reset_task_info.process_name, coredump->reset_task_info.pid); + if (coredump->ring) { + drm_printf(&p, "\nRing timed out details\n"); + drm_printf(&p, "IP Type: %d Ring Name: %s\n", + coredump->ring->funcs->type, + coredump->ring->name); + } + if (coredump->reset_vram_lost) drm_printf(&p, "VRAM is lost due to GPU reset!\n"); if (coredump->adev->reset_info.num_regs) { @@ -220,6 +227,8 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, { struct amdgpu_coredump_info *coredump; struct drm_device *dev = adev_to_drm(adev); + struct amdgpu_job *job = reset_context->job; + struct drm_sched_job *s_job; coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); @@ -230,8 +239,21 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, coredump->reset_vram_lost = vram_lost; - if (reset_context->job && reset_context->job->vm) - coredump->reset_task_info = reset_context->job->vm->task_info; + if (reset_context->job && reset_context->job->vm) { + struct amdgpu_task_info *ti; + struct amdgpu_vm *vm = reset_context->job->vm; + + ti = amdgpu_vm_get_task_info_vm(vm); + if (ti) { + coredump->reset_task_info = *ti; + amdgpu_vm_put_task_info(ti); + } + } + + if (job) { + s_job = &job->base; + coredump->ring = to_amdgpu_ring(s_job->sched); + } coredump->adev = adev; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 19899f6b9b2b..60522963aaca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -97,6 +97,7 @@ struct amdgpu_coredump_info { struct amdgpu_task_info reset_task_info; struct timespec64 reset_time; bool reset_vram_lost; + struct amdgpu_ring *ring; }; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index fe1a61eb6e4c..582053f1cd56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -209,8 +209,7 @@ struct amdgpu_ring_funcs { void (*insert_end)(struct amdgpu_ring *ring); /* pad the indirect buffer to the necessary number of dw */ void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); - unsigned (*init_cond_exec)(struct amdgpu_ring *ring); - void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset); + unsigned (*init_cond_exec)(struct amdgpu_ring *ring, uint64_t addr); /* note usage for clock and power gating */ void (*begin_use)(struct amdgpu_ring *ring); void (*end_use)(struct amdgpu_ring *ring); @@ -286,6 +285,9 @@ struct amdgpu_ring { unsigned cond_exe_offs; u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; + unsigned int set_q_mode_offs; + volatile u32 *set_q_mode_ptr; + u64 set_q_mode_token; unsigned vm_hub; unsigned vm_inv_eng; struct dma_fence *vmid_wait; @@ -327,8 +329,7 @@ struct amdgpu_ring { #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) #define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) -#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) -#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) +#define amdgpu_ring_init_cond_exec(r, a) (r)->funcs->init_cond_exec((r), (a)) #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r) #define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o))) #define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o))) @@ -411,6 +412,30 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, ring->count_dw -= count_dw; } +/** + * amdgpu_ring_patch_cond_exec - patch dw count of conditional execute + * @ring: amdgpu_ring structure + * @offset: offset returned by amdgpu_ring_init_cond_exec + * + * Calculate the dw count and patch it into a cond_exec command. + */ +static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring, + unsigned int offset) +{ + unsigned cur; + + if (!ring->funcs->init_cond_exec) + return; + + WARN_ON(offset > ring->buf_mask); + WARN_ON(ring->ring[offset] != 0); + + cur = (ring->wptr - 1) & ring->buf_mask; + if (cur < offset) + cur += ring->ring_size >> 2; + ring->ring[offset] = cur - offset; +} + #define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset) \ (ring->is_mes_queue && ring->mes_ctx ? \ (ring->mes_ctx->meta_data_gpu_addr + offset) : 0) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ed4a8c5d26d7..4299ce386322 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -513,8 +513,14 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, bo = bo_base->bo; if (dma_resv_locking_ctx(bo->tbo.base.resv) != ticket) { - pr_warn_ratelimited("Evicted user BO is not reserved in pid %d\n", - vm->task_info.pid); + struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm); + + pr_warn_ratelimited("Evicted user BO is not reserved\n"); + if (ti) { + pr_warn_ratelimited("pid %d\n", ti->pid); + amdgpu_vm_put_task_info(ti); + } + return -EINVAL; } @@ -652,7 +658,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool vm_flush_needed = job->vm_needs_flush; struct dma_fence *fence = NULL; bool pasid_mapping_needed = false; - unsigned patch_offset = 0; + unsigned int patch; int r; if (amdgpu_vmid_had_gpu_reset(adev, id)) { @@ -679,7 +685,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, amdgpu_ring_ib_begin(ring); if (ring->funcs->init_cond_exec) - patch_offset = amdgpu_ring_init_cond_exec(ring); + patch = amdgpu_ring_init_cond_exec(ring, + ring->cond_exe_gpu_addr); if (need_pipe_sync) amdgpu_ring_emit_pipeline_sync(ring); @@ -727,8 +734,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, } dma_fence_put(fence); - if (ring->funcs->patch_cond_exec) - amdgpu_ring_patch_cond_exec(ring, patch_offset); + amdgpu_ring_patch_cond_exec(ring, patch); /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ if (ring->funcs->emit_switch_buffer) { @@ -1385,10 +1391,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); - if (vm->pte_support_ats && - mapping->start < AMDGPU_GMC_HOLE_START) - init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - r = amdgpu_vm_update_range(adev, vm, false, false, true, false, resv, mapping->start, mapping->last, init_pte_value, 0, 0, NULL, NULL, @@ -2225,6 +2227,108 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) return dma_fence_wait_timeout(vm->last_unlocked, true, timeout); } +static void amdgpu_vm_destroy_task_info(struct kref *kref) +{ + struct amdgpu_task_info *ti = container_of(kref, struct amdgpu_task_info, refcount); + + kfree(ti); +} + +static inline struct amdgpu_vm * +amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid) +{ + struct amdgpu_vm *vm; + unsigned long flags; + + xa_lock_irqsave(&adev->vm_manager.pasids, flags); + vm = xa_load(&adev->vm_manager.pasids, pasid); + xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); + + return vm; +} + +/** + * amdgpu_vm_put_task_info - reference down the vm task_info ptr + * + * @task_info: task_info struct under discussion. + * + * frees the vm task_info ptr at the last put + */ +void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info) +{ + kref_put(&task_info->refcount, amdgpu_vm_destroy_task_info); +} + +/** + * amdgpu_vm_get_task_info_vm - Extracts task info for a vm. + * + * @vm: VM to get info from + * + * Returns the reference counted task_info structure, which must be + * referenced down with amdgpu_vm_put_task_info. + */ +struct amdgpu_task_info * +amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm) +{ + struct amdgpu_task_info *ti = NULL; + + if (vm) { + ti = vm->task_info; + kref_get(&vm->task_info->refcount); + } + + return ti; +} + +/** + * amdgpu_vm_get_task_info_pasid - Extracts task info for a PASID. + * + * @adev: drm device pointer + * @pasid: PASID identifier for VM + * + * Returns the reference counted task_info structure, which must be + * referenced down with amdgpu_vm_put_task_info. + */ +struct amdgpu_task_info * +amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid) +{ + return amdgpu_vm_get_task_info_vm( + amdgpu_vm_get_vm_from_pasid(adev, pasid)); +} + +static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm) +{ + vm->task_info = kzalloc(sizeof(struct amdgpu_task_info), GFP_KERNEL); + if (!vm->task_info) + return -ENOMEM; + + kref_init(&vm->task_info->refcount); + return 0; +} + +/** + * amdgpu_vm_set_task_info - Sets VMs task info. + * + * @vm: vm for which to set the info + */ +void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) +{ + if (!vm->task_info) + return; + + if (vm->task_info->pid == current->pid) + return; + + vm->task_info->pid = current->pid; + get_task_comm(vm->task_info->task_name, current); + + if (current->group_leader->mm != current->mm) + return; + + vm->task_info->tgid = current->group_leader->pid; + get_task_comm(vm->task_info->process_name, current->group_leader); +} + /** * amdgpu_vm_init - initialize a vm instance * @@ -2264,7 +2368,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) return r; - vm->pte_support_ats = false; vm->is_compute_context = false; vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & @@ -2311,6 +2414,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) goto error_free_root; + r = amdgpu_vm_create_task_info(vm); + if (r) + DRM_DEBUG("Failed to create task info for VM\n"); + amdgpu_bo_unreserve(vm->root.bo); amdgpu_bo_unref(&root_bo); @@ -2350,30 +2457,12 @@ error_free_delayed: */ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); int r; r = amdgpu_bo_reserve(vm->root.bo, true); if (r) return r; - /* Check if PD needs to be reinitialized and do it before - * changing any other state, in case it fails. - */ - if (pte_support_ats != vm->pte_support_ats) { - /* Sanity checks */ - if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { - r = -EINVAL; - goto unreserve_bo; - } - - vm->pte_support_ats = pte_support_ats; - r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo), - false); - if (r) - goto unreserve_bo; - } - /* Update VM state */ vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_COMPUTE); @@ -2450,6 +2539,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) root = amdgpu_bo_ref(vm->root.bo); amdgpu_bo_reserve(root, true); + amdgpu_vm_put_task_info(vm->task_info); amdgpu_vm_set_pasid(adev, vm, 0); dma_fence_wait(vm->last_unlocked, false); dma_fence_put(vm->last_unlocked); @@ -2607,48 +2697,6 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) } /** - * amdgpu_vm_get_task_info - Extracts task info for a PASID. - * - * @adev: drm device pointer - * @pasid: PASID identifier for VM - * @task_info: task_info to fill. - */ -void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, - struct amdgpu_task_info *task_info) -{ - struct amdgpu_vm *vm; - unsigned long flags; - - xa_lock_irqsave(&adev->vm_manager.pasids, flags); - - vm = xa_load(&adev->vm_manager.pasids, pasid); - if (vm) - *task_info = vm->task_info; - - xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); -} - -/** - * amdgpu_vm_set_task_info - Sets VMs task info. - * - * @vm: vm for which to set the info - */ -void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) -{ - if (vm->task_info.pid) - return; - - vm->task_info.pid = current->pid; - get_task_comm(vm->task_info.task_name, current); - - if (current->group_leader->mm != current->mm) - return; - - vm->task_info.tgid = current->group_leader->pid; - get_task_comm(vm->task_info.process_name, current->group_leader); -} - -/** * amdgpu_vm_handle_fault - graceful handling of VM faults. * @adev: amdgpu device pointer * @pasid: PASID of the VM diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 42f6ddec50c1..047ec1930d12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -203,10 +203,11 @@ struct amdgpu_vm_pte_funcs { }; struct amdgpu_task_info { - char process_name[TASK_COMM_LEN]; - char task_name[TASK_COMM_LEN]; - pid_t pid; - pid_t tgid; + char process_name[TASK_COMM_LEN]; + char task_name[TASK_COMM_LEN]; + pid_t pid; + pid_t tgid; + struct kref refcount; }; /** @@ -357,9 +358,6 @@ struct amdgpu_vm { /* Functions to use for VM table updates */ const struct amdgpu_vm_update_funcs *update_funcs; - /* Flag to indicate ATS support from PTE for GFX9 */ - bool pte_support_ats; - /* Up to 128 pending retry page faults */ DECLARE_KFIFO(faults, u64, 128); @@ -373,7 +371,7 @@ struct amdgpu_vm { uint64_t pd_phys_addr; /* Some basic info about the task */ - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; /* Store positions of group of BOs */ struct ttm_lru_bulk_move lru_bulk_move; @@ -514,8 +512,14 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, struct amdgpu_job *job); void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); -void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, - struct amdgpu_task_info *task_info); +struct amdgpu_task_info * +amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid); + +struct amdgpu_task_info * +amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm); + +void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info); + bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, u32 vmid, u32 node_id, uint64_t addr, bool write_fault); @@ -533,8 +537,6 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, int level, bool immediate, struct amdgpu_bo_vm **vmbo, int32_t xcp_id); void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm); -bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, - struct amdgpu_vm *vm); int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, struct amdgpu_vm_bo_base *entry); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index a160265ddc07..124389a6bf48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -90,22 +90,6 @@ static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev, } /** - * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD - * - * @adev: amdgpu_device pointer - * - * Returns: - * The number of entries in the root page directory which needs the ATS setting. - */ -static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev) -{ - unsigned int shift; - - shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); - return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT); -} - -/** * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT * * @adev: amdgpu_device pointer @@ -379,7 +363,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct ttm_operation_ctx ctx = { true, false }; struct amdgpu_vm_update_params params; struct amdgpu_bo *ancestor = &vmbo->bo; - unsigned int entries, ats_entries; + unsigned int entries; struct amdgpu_bo *bo = &vmbo->bo; uint64_t addr; int r, idx; @@ -394,27 +378,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, } entries = amdgpu_bo_size(bo) / 8; - if (!vm->pte_support_ats) { - ats_entries = 0; - - } else if (!bo->parent) { - ats_entries = amdgpu_vm_pt_num_ats_entries(adev); - ats_entries = min(ats_entries, entries); - entries -= ats_entries; - - } else { - struct amdgpu_vm_bo_base *pt; - - pt = ancestor->vm_bo; - ats_entries = amdgpu_vm_pt_num_ats_entries(adev); - if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >= - ats_entries) { - ats_entries = 0; - } else { - ats_entries = entries; - entries = 0; - } - } r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) @@ -445,44 +408,24 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, goto exit; addr = 0; - if (ats_entries) { - uint64_t value = 0, flags; - flags = AMDGPU_PTE_DEFAULT_ATC; + uint64_t value = 0, flags = 0; + if (adev->asic_type >= CHIP_VEGA10) { if (level != AMDGPU_VM_PTB) { /* Handle leaf PDEs as PTEs */ flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(adev, level, &value, &flags); + amdgpu_gmc_get_vm_pde(adev, level, + &value, &flags); + } else { + /* Workaround for fault priority problem on GMC9 */ + flags = AMDGPU_PTE_EXECUTABLE; } - - r = vm->update_funcs->update(¶ms, vmbo, addr, 0, - ats_entries, value, flags); - if (r) - goto exit; - - addr += ats_entries * 8; } - if (entries) { - uint64_t value = 0, flags = 0; - - if (adev->asic_type >= CHIP_VEGA10) { - if (level != AMDGPU_VM_PTB) { - /* Handle leaf PDEs as PTEs */ - flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(adev, level, - &value, &flags); - } else { - /* Workaround for fault priority problem on GMC9 */ - flags = AMDGPU_PTE_EXECUTABLE; - } - } - - r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, - value, flags); - if (r) - goto exit; - } + r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, + value, flags); + if (r) + goto exit; r = vm->update_funcs->commit(¶ms, NULL); exit: @@ -728,33 +671,6 @@ void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) } /** - * amdgpu_vm_pt_is_root_clean - check if a root PD is clean - * - * @adev: amdgpu_device pointer - * @vm: the VM to check - * - * Check all entries of the root PD, if any subsequent PDs are allocated, - * it means there are page table creating and filling, and is no a clean - * VM - * - * Returns: - * 0 if this VM is clean - */ -bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, - struct amdgpu_vm *vm) -{ - enum amdgpu_vm_level root = adev->vm_manager.root_level; - unsigned int entries = amdgpu_vm_pt_num_entries(adev, root); - unsigned int i = 0; - - for (i = 0; i < entries; i++) { - if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo) - return false; - } - return true; -} - -/** * amdgpu_vm_pde_update - update a single level in the hierarchy * * @params: parameters for the update @@ -1027,7 +943,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, min(nptes, 32u), dst, incr, upd_flags, - vm->task_info.tgid, + vm->task_info ? vm->task_info->tgid : 0, vm->immediate.fence_context); amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt), cursor.level, pe_start, dst, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index b9a15d51eb5c..70c5cc80ecdc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -297,6 +297,10 @@ static int vpe_early_init(void *handle) case IP_VERSION(6, 1, 0): vpe_v6_1_set_funcs(vpe); break; + case IP_VERSION(6, 1, 1): + vpe_v6_1_set_funcs(vpe); + vpe->collaborate_mode = true; + break; default: return -EINVAL; } @@ -304,6 +308,8 @@ static int vpe_early_init(void *handle) vpe_set_ring_funcs(adev); vpe_set_regs(vpe); + dev_info(adev->dev, "VPE: collaborate mode %s", vpe->collaborate_mode ? "true" : "false"); + return 0; } @@ -457,6 +463,18 @@ static uint64_t vpe_get_csa_mc_addr(struct amdgpu_ring *ring, uint32_t vmid) return csa_mc_addr; } +static void vpe_ring_emit_pred_exec(struct amdgpu_ring *ring, + uint32_t device_select, + uint32_t exec_count) +{ + if (!ring->adev->vpe.collaborate_mode) + return; + + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_PRED_EXE, 0) | + (device_select << 16)); + amdgpu_ring_write(ring, exec_count & 0x1fff); +} + static void vpe_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, @@ -505,6 +523,8 @@ static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring) uint32_t seq = ring->fence_drv.sync_seq; uint64_t addr = ring->fence_drv.gpu_addr; + vpe_ring_emit_pred_exec(ring, 0, 6); + /* wait for idle */ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM, VPE_POLL_REGMEM_SUBOP_REGMEM) | @@ -520,6 +540,8 @@ static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { + vpe_ring_emit_pred_exec(ring, 0, 3); + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_REG_WRITE, 0)); amdgpu_ring_write(ring, reg << 2); amdgpu_ring_write(ring, val); @@ -528,6 +550,8 @@ static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t static void vpe_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask) { + vpe_ring_emit_pred_exec(ring, 0, 6); + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM, VPE_POLL_REGMEM_SUBOP_REGMEM) | VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ @@ -546,34 +570,24 @@ static void vpe_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); } -static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring) +static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned int ret; + if (ring->adev->vpe.collaborate_mode) + return ~0; + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, 1); - ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ - amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + ret = ring->wptr & ring->buf_mask; + amdgpu_ring_write(ring, 0); return ret; } -static void vpe_ring_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset) -{ - unsigned int cur; - - WARN_ON_ONCE(offset > ring->buf_mask); - WARN_ON_ONCE(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (cur > offset) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - static int vpe_ring_preempt_ib(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -695,16 +709,22 @@ static void vpe_ring_set_wptr(struct amdgpu_ring *ring) upper_32_bits(ring->wptr << 2)); atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + if (vpe->collaborate_mode) + WDOORBELL64(ring->doorbell_index + 4, ring->wptr << 2); } else { - dev_dbg(adev->dev, "Not using doorbell, \ - regVPEC_QUEUE0_RB_WPTR == 0x%08x, \ - regVPEC_QUEUE0_RB_WPTR_HI == 0x%08x\n", - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - WREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_lo), - lower_32_bits(ring->wptr << 2)); - WREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_hi), - upper_32_bits(ring->wptr << 2)); + int i; + + for (i = 0; i < vpe->num_instances; i++) { + dev_dbg(adev->dev, "Not using doorbell, \ + regVPEC_QUEUE0_RB_WPTR == 0x%08x, \ + regVPEC_QUEUE0_RB_WPTR_HI == 0x%08x\n", + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + WREG32(vpe_get_reg_offset(vpe, i, vpe->regs.queue0_rb_wptr_lo), + lower_32_bits(ring->wptr << 2)); + WREG32(vpe_get_reg_offset(vpe, i, vpe->regs.queue0_rb_wptr_hi), + upper_32_bits(ring->wptr << 2)); + } } } @@ -864,7 +884,6 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = { .test_ring = vpe_ring_test_ring, .test_ib = vpe_ring_test_ib, .init_cond_exec = vpe_ring_init_cond_exec, - .patch_cond_exec = vpe_ring_patch_cond_exec, .preempt_ib = vpe_ring_preempt_ib, .begin_use = vpe_ring_begin_use, .end_use = vpe_ring_end_use, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h index 1153ddaea64d..231d86d0953e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h @@ -27,6 +27,8 @@ #include "amdgpu_irq.h" #include "vpe_6_1_fw_if.h" +#define AMDGPU_MAX_VPE_INSTANCES 2 + struct amdgpu_vpe; struct vpe_funcs { @@ -74,6 +76,9 @@ struct amdgpu_vpe { uint32_t *cmdbuf_cpu_addr; struct delayed_work idle_work; bool context_started; + + uint32_t num_instances; + bool collaborate_mode; }; int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 691fa40e4e01..904b9ff5ead2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -8542,34 +8542,23 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, amdgpu_ring_write(ring, 0); } -static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) +static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned int ret; amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, 0); ret = ring->wptr & ring->buf_mask; - amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + /* patch dummy value later */ + amdgpu_ring_write(ring, 0); return ret; } -static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset) -{ - unsigned int cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (likely(cur > offset)) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring) { int i, r = 0; @@ -9224,7 +9213,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_switch_buffer = gfx_v10_0_ring_emit_sb, .emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl, .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec, .preempt_ib = gfx_v10_0_ring_preempt_ib, .emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl, .emit_wreg = gfx_v10_0_ring_emit_wreg, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 0d90d60a21d6..1770e496c1b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -5461,6 +5461,11 @@ static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); amdgpu_ring_write(ring, 0x0); } + + /* Make sure that we can't skip the SET_Q_MODE packets when the VM + * changed in any way. + */ + ring->set_q_mode_ptr = NULL; } static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, @@ -5510,16 +5515,81 @@ static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, amdgpu_ring_write(ring, 0); } +static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) +{ + unsigned ret; + + amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, 0); + ret = ring->wptr & ring->buf_mask; + /* patch dummy value later */ + amdgpu_ring_write(ring, 0); + + return ret; +} + static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, u64 shadow_va, u64 csa_va, u64 gds_va, bool init_shadow, int vmid) { struct amdgpu_device *adev = ring->adev; + unsigned int offs, end; - if (!adev->gfx.cp_gfx_shadow) + if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj) return; + /* + * The logic here isn't easy to understand because we need to keep state + * accross multiple executions of the function as well as between the + * CPU and GPU. The general idea is that the newly written GPU command + * has a condition on the previous one and only executed if really + * necessary. + */ + + /* + * The dw in the NOP controls if the next SET_Q_MODE packet should be + * executed or not. Reserve 64bits just to be on the save side. + */ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1)); + offs = ring->wptr & ring->buf_mask; + + /* + * We start with skipping the prefix SET_Q_MODE and always executing + * the postfix SET_Q_MODE packet. This is changed below with a + * WRITE_DATA command when the postfix executed. + */ + amdgpu_ring_write(ring, shadow_va ? 1 : 0); + amdgpu_ring_write(ring, 0); + + if (ring->set_q_mode_offs) { + uint64_t addr; + + addr = amdgpu_bo_gpu_offset(ring->ring_obj); + addr += ring->set_q_mode_offs << 2; + end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr); + } + + /* + * When the postfix SET_Q_MODE packet executes we need to make sure that the + * next prefix SET_Q_MODE packet executes as well. + */ + if (!shadow_va) { + uint64_t addr; + + addr = amdgpu_bo_gpu_offset(ring->ring_obj); + addr += offs << 2; + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, 0x1); + } + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); amdgpu_ring_write(ring, lower_32_bits(shadow_va)); amdgpu_ring_write(ring, upper_32_bits(shadow_va)); @@ -5531,33 +5601,26 @@ static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); amdgpu_ring_write(ring, init_shadow ? PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); -} -static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) -{ - unsigned ret; + if (ring->set_q_mode_offs) + amdgpu_ring_patch_cond_exec(ring, end); - amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ - ret = ring->wptr & ring->buf_mask; - amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + if (shadow_va) { + uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid; - return ret; -} + /* + * If the tokens match try to skip the last postfix SET_Q_MODE + * packet to avoid saving/restoring the state all the time. + */ + if (ring->set_q_mode_ptr && ring->set_q_mode_token == token) + *ring->set_q_mode_ptr = 0; -static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) -{ - unsigned cur; - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); + ring->set_q_mode_token = token; + } else { + ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs]; + } - cur = (ring->wptr - 1) & ring->buf_mask; - if (likely(cur > offset)) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; + ring->set_q_mode_offs = offs; } static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) @@ -6124,7 +6187,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .emit_frame_size = /* totally 247 maximum if 16 IBs */ 5 + /* update_spm_vmid */ 5 + /* COND_EXEC */ - 9 + /* SET_Q_PREEMPTION_MODE */ + 22 + /* SET_Q_PREEMPTION_MODE */ 7 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + @@ -6137,6 +6200,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 31 + /* DE_META */ 3 + /* CNTX_CTRL */ 5 + /* HDP_INVL */ + 22 + /* SET_Q_PREEMPTION_MODE */ 8 + 8 + /* FENCE x2 */ 8, /* gfx_v11_0_emit_mem_sync */ .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ @@ -6153,7 +6217,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec, .preempt_ib = gfx_v11_0_ring_preempt_ib, .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl, .emit_wreg = gfx_v11_0_ring_emit_wreg, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index b97ea62212b6..202ddda57f98 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6326,33 +6326,22 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) amdgpu_ring_write(ring, 0); } -static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) +static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, 0); ret = ring->wptr & ring->buf_mask; - amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + /* patch dummy value later */ + amdgpu_ring_write(ring, 0); return ret; } -static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) -{ - unsigned cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr & ring->buf_mask) - 1; - if (likely(cur > offset)) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; -} - static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t reg_val_offs) { @@ -6932,7 +6921,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .emit_switch_buffer = gfx_v8_ring_emit_sb, .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, .emit_wreg = gfx_v8_0_ring_emit_wreg, .soft_recovery = gfx_v8_0_ring_soft_recovery, .emit_mem_sync = gfx_v8_0_emit_mem_sync, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 7669f82aa1da..6f97a6d0e6d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2080,7 +2080,7 @@ static int gfx_v9_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; /* disable scheduler on the real ring */ - ring->no_scheduler = true; + ring->no_scheduler = adev->gfx.mcbp; ring->vm_hub = AMDGPU_GFXHUB(0); r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, @@ -2090,7 +2090,7 @@ static int gfx_v9_0_sw_init(void *handle) } /* set up the software rings */ - if (adev->gfx.num_gfx_rings) { + if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { ring = &adev->gfx.sw_gfx_ring[i]; ring->ring_obj = NULL; @@ -2180,7 +2180,7 @@ static int gfx_v9_0_sw_fini(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->gfx.num_gfx_rings) { + if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]); amdgpu_ring_mux_fini(&adev->gfx.muxer); @@ -5610,31 +5610,21 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) amdgpu_ring_write(ring, 0); } -static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) +static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, 0); ret = ring->wptr & ring->buf_mask; - amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + /* patch dummy value later */ + amdgpu_ring_write(ring, 0); return ret; } -static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) -{ - unsigned cur; - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (likely(cur > offset)) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->ring_size>>2) - offset + cur; -} - static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t reg_val_offs) { @@ -5909,11 +5899,14 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, switch (me_id) { case 0: - if (adev->gfx.num_gfx_rings && - !amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) { - /* Fence signals are handled on the software rings*/ - for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) - amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); + if (adev->gfx.num_gfx_rings) { + if (!adev->gfx.mcbp) { + amdgpu_fence_process(&adev->gfx.gfx_ring[0]); + } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) { + /* Fence signals are handled on the software rings*/ + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) + amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); + } } break; case 1: @@ -6908,7 +6901,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_switch_buffer = gfx_v9_ring_emit_sb, .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, .preempt_ib = gfx_v9_0_ring_preempt_ib, .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, .emit_wreg = gfx_v9_0_ring_emit_wreg, @@ -6963,7 +6955,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .emit_switch_buffer = gfx_v9_ring_emit_sb, .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, @@ -7050,7 +7041,7 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; - if (adev->gfx.num_gfx_rings) { + if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index db89d13bd80d..d933e19e0cf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -105,7 +105,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_vmhub *hub = &adev->vmhub[vmhub_index]; bool retry_fault = !!(entry->src_data[1] & 0x80); bool write_fault = !!(entry->src_data[1] & 0x20); - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; uint32_t status = 0; u64 addr; @@ -157,18 +157,22 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, if (!printk_ratelimit()) return 0; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_err(adev->dev, - "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n", + "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", entry->vmid_src ? "mmhub" : "gfxhub", - entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + entry->src_id, entry->ring_id, entry->vmid, entry->pasid); + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_err(adev->dev, + " in process %s pid %d thread %s pid %d\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + dev_err(adev->dev, " in page starting at address 0x%016llx from client 0x%x (%s)\n", - addr, entry->client_id, - soc15_ih_clientid_name[entry->client_id]); + addr, entry->client_id, + soc15_ih_clientid_name[entry->client_id]); if (!amdgpu_sriov_vf(adev)) hub->vmhub_funcs->print_l2_protection_fault_status(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index a3812f0036a0..527dc917e049 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -126,19 +126,24 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev, } if (printk_ratelimit()) { - struct amdgpu_task_info task_info; - - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); + struct amdgpu_task_info *task_info; dev_err(adev->dev, - "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n", + "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", entry->vmid_src ? "mmhub" : "gfxhub", - entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + entry->src_id, entry->ring_id, entry->vmid, entry->pasid); + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_err(adev->dev, + " in process %s pid %d thread %s pid %d)\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", - addr, entry->client_id); + addr, entry->client_id); + if (!amdgpu_sriov_vf(adev)) hub->vmhub_funcs->print_l2_protection_fault_status(adev, status); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 969a9e867170..d20e5f20ee31 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1445,18 +1445,24 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, gmc_v8_0_set_fault_enable_default(adev, false); if (printk_ratelimit()) { - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); + dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", + entry->src_id, entry->src_data[0]); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_err(adev->dev, " for process %s pid %d thread %s pid %d\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } - dev_err(adev->dev, "GPU fault detected: %d 0x%08x for process %s pid %d thread %s pid %d\n", - entry->src_id, entry->src_data[0], task_info.process_name, - task_info.tgid, task_info.task_name, task_info.pid); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", - addr); + addr); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", status); + gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client, entry->pasid); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 1439e62e9378..47b63a4ce68b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -549,7 +549,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, bool retry_fault = !!(entry->src_data[1] & 0x80); bool write_fault = !!(entry->src_data[1] & 0x20); uint32_t status = 0, cid = 0, rw = 0; - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; struct amdgpu_vmhub *hub; const char *mmhub_cid; const char *hub_name; @@ -626,15 +626,20 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, if (!printk_ratelimit()) return 0; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_err(adev->dev, - "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n", - hub_name, retry_fault ? "retry" : "no-retry", - entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", hub_name, + retry_fault ? "retry" : "no-retry", + entry->src_id, entry->ring_id, entry->vmid, entry->pasid); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_err(adev->dev, + " for process %s pid %d thread %s pid %d)\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + dev_err(adev->dev, " in page starting at address 0x%016llx from IH client 0x%x (%s)\n", addr, entry->client_id, soc15_ih_clientid_name[entry->client_id]); diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c new file mode 100644 index 000000000000..96ed00ac81ac --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c @@ -0,0 +1,495 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "nbif_v6_3_1.h" + +#include "nbif/nbif_6_3_1_offset.h" +#include "nbif/nbif_6_3_1_sh_mask.h" +#include "pcie/pcie_6_1_0_offset.h" +#include "pcie/pcie_6_1_0_sh_mask.h" +#include <uapi/linux/kfd_ioctl.h> + +static void nbif_v6_3_1_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + +static u32 nbif_v6_3_1_get_rev_id(struct amdgpu_device *adev) +{ + u32 tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0); + + tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; + tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; + + return tmp; +} + +static void nbif_v6_3_1_mc_access_enable(struct amdgpu_device *adev, bool enable) +{ + if (enable) + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, + BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK | + BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK); + else + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0); +} + +static u32 nbif_v6_3_1_get_memsize(struct amdgpu_device *adev) +{ + return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_RCC_CONFIG_MEMSIZE); +} + +static void nbif_v6_3_1_sdma_doorbell_range(struct amdgpu_device *adev, + int instance, bool use_doorbell, + int doorbell_index, + int doorbell_size) +{ + if (instance == 0) { + u32 doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_ENABLE, + 0x1); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_AWID, + 0xe); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_RANGE_OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_RANGE_SIZE, + doorbell_size); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_AWADDR_31_28_VALUE, + 0x3); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_RANGE_SIZE, + 0); + + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, doorbell_range); + } +} + +static void nbif_v6_3_1_vcn_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index, + int instance) +{ + u32 doorbell_range; + + if (instance) + doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL); + else + doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_ENABLE, + 0x1); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_AWID, + instance ? 0x7 : 0x4); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_RANGE_OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_RANGE_SIZE, + 8); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_AWADDR_31_28_VALUE, + instance ? 0x7 : 0x4); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_RANGE_SIZE, + 0); + + if (instance) + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL, doorbell_range); + else + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, doorbell_range); +} + +static void nbif_v6_3_1_gc_doorbell_init(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL, 0x30000007); + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL, 0x3000000d); +} + +static void nbif_v6_3_1_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) +{ + WREG32_FIELD15_PREREG(NBIO, 0, RCC_DEV0_EPF0_RCC_DOORBELL_APER_EN, + BIF_DOORBELL_APER_EN, enable ? 1 : 0); +} + +static void +nbif_v6_3_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp = 0; + + if (enable) { + tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_EN, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_MODE, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_SIZE, 0); + + WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW, + lower_32_bits(adev->doorbell.base)); + WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH, + upper_32_bits(adev->doorbell.base)); + } + + WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, tmp); +} + +static void nbif_v6_3_1_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) +{ + u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL); + + if (use_doorbell) { + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_ENABLE, + 0x1); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_AWID, + 0x0); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_RANGE_OFFSET, + doorbell_index); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_RANGE_SIZE, + 2); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, + 0x0); + } else + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_RANGE_SIZE, + 0); + + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, ih_doorbell_range); +} + +static void nbif_v6_3_1_ih_control(struct amdgpu_device *adev) +{ + u32 interrupt_cntl; + + /* setup interrupt control */ + WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL2, adev->dummy_page_addr >> 8); + + interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL); + /* + * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi + * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN + */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL, + IH_DUMMY_RD_OVERRIDE, 0); + + /* BIF_BX0_INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL, + IH_REQ_NONSNOOP_EN, 0); + + WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL, interrupt_cntl); +} + +static void +nbif_v6_3_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ +} + +static void +nbif_v6_3_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ +} + +static void +nbif_v6_3_1_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ +} + +static u32 nbif_v6_3_1_get_hdp_flush_req_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ); +} + +static u32 nbif_v6_3_1_get_hdp_flush_done_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE); +} + +static u32 nbif_v6_3_1_get_pcie_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX); +} + +static u32 nbif_v6_3_1_get_pcie_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA); +} + +const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg = { + .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK, + .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK, + .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK, + .ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK, + .ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK, + .ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK, + .ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK, + .ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK, + .ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK, + .ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK, + .ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK, + .ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK, +}; + +static void nbif_v6_3_1_init_registers(struct amdgpu_device *adev) +{ + uint32_t data; + + data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2); + data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK; + WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data); +} + +static u32 nbif_v6_3_1_get_rom_offset(struct amdgpu_device *adev) +{ + u32 data, rom_offset; + + data = RREG32_SOC15(NBIO, 0, regREGS_ROM_OFFSET_CTRL); + rom_offset = REG_GET_FIELD(data, REGS_ROM_OFFSET_CTRL, ROM_OFFSET); + + return rom_offset; +} + +#ifdef CONFIG_PCIEASPM +static void nbif_v6_3_1_program_ltr(struct amdgpu_device *adev) +{ + uint32_t def, data; + + def = RREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL); + data = 0x35EB; + data &= ~RCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK; + data &= ~RCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL__LTR_PRIV_RST_LTR_IN_DL_DOWN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2); + data &= ~RCC_STRAP0_RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2); + if (adev->pdev->ltr_path) + data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + else + data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); +} +#endif + +static void nbif_v6_3_1_program_aspm(struct amdgpu_device *adev) +{ +#ifdef CONFIG_PCIEASPM + uint32_t def, data; + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL); + data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK; + data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; + data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL7); + data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL7, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3); + data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3); + data &= ~RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK; + data &= ~RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5); + data &= ~RCC_STRAP0_RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2); + data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); + + WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001); + +#if 0 + /* regPSWUSP0_PCIE_LC_CNTL2 should be replace by PCIE_LC_CNTL2 or someone else ? */ + def = data = RREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2); + data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK | + PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK; + data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2, data); +#endif + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL4); + data |= PCIE_LC_CNTL4__LC_L1_POWERDOWN_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL4, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL); + data |= PCIE_LC_RXRECOVER_RXSTANDBY_CNTL__LC_RX_L0S_STANDBY_EN_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL, data); + + nbif_v6_3_1_program_ltr(adev); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3); + data |= 0x5DE0 << RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; + data |= 0x0010 << RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5); + data |= 0x0010 << RCC_STRAP0_RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL); + data |= 0x0 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT; + data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; + data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3); + data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3, data); +#endif +} + +const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs = { + .get_hdp_flush_req_offset = nbif_v6_3_1_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbif_v6_3_1_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbif_v6_3_1_get_pcie_index_offset, + .get_pcie_data_offset = nbif_v6_3_1_get_pcie_data_offset, + .get_rev_id = nbif_v6_3_1_get_rev_id, + .mc_access_enable = nbif_v6_3_1_mc_access_enable, + .get_memsize = nbif_v6_3_1_get_memsize, + .sdma_doorbell_range = nbif_v6_3_1_sdma_doorbell_range, + .vcn_doorbell_range = nbif_v6_3_1_vcn_doorbell_range, + .gc_doorbell_init = nbif_v6_3_1_gc_doorbell_init, + .enable_doorbell_aperture = nbif_v6_3_1_enable_doorbell_aperture, + .enable_doorbell_selfring_aperture = nbif_v6_3_1_enable_doorbell_selfring_aperture, + .ih_doorbell_range = nbif_v6_3_1_ih_doorbell_range, + .update_medium_grain_clock_gating = nbif_v6_3_1_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbif_v6_3_1_update_medium_grain_light_sleep, + .get_clockgating_state = nbif_v6_3_1_get_clockgating_state, + .ih_control = nbif_v6_3_1_ih_control, + .init_registers = nbif_v6_3_1_init_registers, + .remap_hdp_registers = nbif_v6_3_1_remap_hdp_registers, + .get_rom_offset = nbif_v6_3_1_get_rom_offset, + .program_aspm = nbif_v6_3_1_program_aspm, +}; + + +static void nbif_v6_3_1_sriov_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) +{ +} + +static void nbif_v6_3_1_sriov_sdma_doorbell_range(struct amdgpu_device *adev, + int instance, bool use_doorbell, + int doorbell_index, + int doorbell_size) +{ +} + +static void nbif_v6_3_1_sriov_vcn_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, + int doorbell_index, int instance) +{ +} + +static void nbif_v6_3_1_sriov_gc_doorbell_init(struct amdgpu_device *adev) +{ +} + +const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs = { + .get_hdp_flush_req_offset = nbif_v6_3_1_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbif_v6_3_1_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbif_v6_3_1_get_pcie_index_offset, + .get_pcie_data_offset = nbif_v6_3_1_get_pcie_data_offset, + .get_rev_id = nbif_v6_3_1_get_rev_id, + .mc_access_enable = nbif_v6_3_1_mc_access_enable, + .get_memsize = nbif_v6_3_1_get_memsize, + .sdma_doorbell_range = nbif_v6_3_1_sriov_sdma_doorbell_range, + .vcn_doorbell_range = nbif_v6_3_1_sriov_vcn_doorbell_range, + .gc_doorbell_init = nbif_v6_3_1_sriov_gc_doorbell_init, + .enable_doorbell_aperture = nbif_v6_3_1_enable_doorbell_aperture, + .enable_doorbell_selfring_aperture = nbif_v6_3_1_enable_doorbell_selfring_aperture, + .ih_doorbell_range = nbif_v6_3_1_sriov_ih_doorbell_range, + .update_medium_grain_clock_gating = nbif_v6_3_1_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbif_v6_3_1_update_medium_grain_light_sleep, + .get_clockgating_state = nbif_v6_3_1_get_clockgating_state, + .ih_control = nbif_v6_3_1_ih_control, + .init_registers = nbif_v6_3_1_init_registers, + .remap_hdp_registers = nbif_v6_3_1_remap_hdp_registers, + .get_rom_offset = nbif_v6_3_1_get_rom_offset, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h new file mode 100644 index 000000000000..b7f2e0d88905 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h @@ -0,0 +1,33 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NBIO_V6_3_1_H__ +#define __NBIO_V6_3_1_H__ + +#include "soc15_common.h" + +extern const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg; +extern const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs; +extern const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 3d68dd5523c6..43775cb67ff5 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2104,7 +2104,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { int instance; - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; u64 addr; instance = sdma_v4_0_irq_id_to_seq(entry->client_id); @@ -2116,15 +2116,20 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev, addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_dbg_ratelimited(adev->dev, - "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u " - "pasid:%u, for process %s pid %d thread %s pid %d\n", - instance, addr, entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u pasid:%u\n", + instance, addr, entry->src_id, entry->ring_id, entry->vmid, + entry->pasid); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_dbg_ratelimited(adev->dev, + " for process %s pid %d thread %s pid %d\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index fec5a3d1c4bc..eaa4f5f49949 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1644,7 +1644,7 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { int instance; - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; u64 addr; instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); @@ -1656,15 +1656,19 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev, addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_dbg_ratelimited(adev->dev, - "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u " - "pasid:%u, for process %s pid %d thread %s pid %d\n", - instance, addr, entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u pasid:%u\n", + instance, addr, entry->src_id, entry->ring_id, entry->vmid, + entry->pasid); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_dbg_ratelimited(adev->dev, " for process %s pid %d thread %s pid %d\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 3c485e5a531a..883e8a1b8a40 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -249,35 +249,23 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) return ret; } -static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring) +static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, 1); - ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ - amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + /* this is the offset we need patch later */ + ret = ring->wptr & ring->buf_mask; + /* insert dummy here and patch it later */ + amdgpu_ring_write(ring, 0); return ret; } -static void sdma_v5_0_ring_patch_cond_exec(struct amdgpu_ring *ring, - unsigned offset) -{ - unsigned cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (cur > offset) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - /** * sdma_v5_0_ring_get_rptr - get the current read pointer * @@ -1780,7 +1768,6 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v5_0_ring_init_cond_exec, - .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec, .preempt_ib = sdma_v5_0_ring_preempt_ib, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 0058f3f7cf6e..42f4bd250def 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -89,35 +89,23 @@ static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3 return base + internal_offset; } -static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring) +static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, 1); - ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ - amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + /* this is the offset we need patch later */ + ret = ring->wptr & ring->buf_mask; + /* insert dummy here and patch it later */ + amdgpu_ring_write(ring, 0); return ret; } -static void sdma_v5_2_ring_patch_cond_exec(struct amdgpu_ring *ring, - unsigned offset) -{ - unsigned cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (cur > offset) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - /** * sdma_v5_2_ring_get_rptr - get the current read pointer * @@ -1722,7 +1710,6 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { .emit_reg_wait = sdma_v5_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v5_2_ring_init_cond_exec, - .patch_cond_exec = sdma_v5_2_ring_patch_cond_exec, .preempt_ib = sdma_v5_2_ring_preempt_ib, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 4874ded45653..361835a61f2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -80,35 +80,23 @@ static u32 sdma_v6_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3 return base + internal_offset; } -static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring) +static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, 1); - ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ - amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + /* this is the offset we need patch later */ + ret = ring->wptr & ring->buf_mask; + /* insert dummy here and patch it later */ + amdgpu_ring_write(ring, 0); return ret; } -static void sdma_v6_0_ring_patch_cond_exec(struct amdgpu_ring *ring, - unsigned offset) -{ - unsigned cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (cur > offset) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - /** * sdma_v6_0_ring_get_rptr - get the current read pointer * @@ -1542,7 +1530,6 @@ static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = { .emit_reg_wait = sdma_v6_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = sdma_v6_0_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v6_0_ring_init_cond_exec, - .patch_cond_exec = sdma_v6_0_ring_patch_cond_exec, .preempt_ib = sdma_v6_0_ring_preempt_ib, }; diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index b92fd6760fa9..581a3bd11481 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -388,6 +388,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev) case IP_VERSION(13, 0, 4): case IP_VERSION(13, 0, 11): case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): return AMD_RESET_METHOD_MODE2; default: if (amdgpu_dpm_is_baco_supported(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h b/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h index 9b550deb48d3..47534dbbd137 100644 --- a/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h +++ b/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h @@ -40,7 +40,8 @@ enum VPE_CMD_OPCODE { VPE_CMD_OPCODE_POLL_REGMEM = 0x8, VPE_CMD_OPCODE_COND_EXE = 0x9, VPE_CMD_OPCODE_ATOMIC = 0xA, - VPE_CMD_OPCODE_PLANE_FILL = 0xB, + VPE_CMD_OPCODE_PRED_EXE = 0xB, + VPE_CMD_OPCODE_COLLAB_SYNC = 0xC, VPE_CMD_OPCODE_TIMESTAMP = 0xD }; diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c index d20060a51e05..769eb8f7bb3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c @@ -33,14 +33,38 @@ #include "vpe/vpe_6_1_0_sh_mask.h" MODULE_FIRMWARE("amdgpu/vpe_6_1_0.bin"); +MODULE_FIRMWARE("amdgpu/vpe_6_1_1.bin"); #define VPE_THREAD1_UCODE_OFFSET 0x8000 +#define regVPEC_COLLABORATE_CNTL 0x0013 +#define regVPEC_COLLABORATE_CNTL_BASE_IDX 0 +#define VPEC_COLLABORATE_CNTL__COLLABORATE_MODE_EN__SHIFT 0x0 +#define VPEC_COLLABORATE_CNTL__COLLABORATE_MODE_EN_MASK 0x00000001L + +#define regVPEC_COLLABORATE_CFG 0x0014 +#define regVPEC_COLLABORATE_CFG_BASE_IDX 0 +#define VPEC_COLLABORATE_CFG__MASTER_ID__SHIFT 0x0 +#define VPEC_COLLABORATE_CFG__MASTER_EN__SHIFT 0x3 +#define VPEC_COLLABORATE_CFG__SLAVE0_ID__SHIFT 0x4 +#define VPEC_COLLABORATE_CFG__SLAVE0_EN__SHIFT 0x7 +#define VPEC_COLLABORATE_CFG__MASTER_ID_MASK 0x00000007L +#define VPEC_COLLABORATE_CFG__MASTER_EN_MASK 0x00000008L +#define VPEC_COLLABORATE_CFG__SLAVE0_ID_MASK 0x00000070L +#define VPEC_COLLABORATE_CFG__SLAVE0_EN_MASK 0x00000080L + +#define regVPEC_CNTL_6_1_1 0x0016 +#define regVPEC_CNTL_6_1_1_BASE_IDX 0 +#define regVPEC_QUEUE_RESET_REQ_6_1_1 0x002c +#define regVPEC_QUEUE_RESET_REQ_6_1_1_BASE_IDX 0 +#define regVPEC_PUB_DUMMY2_6_1_1 0x004c +#define regVPEC_PUB_DUMMY2_6_1_1_BASE_IDX 0 + static uint32_t vpe_v6_1_get_reg_offset(struct amdgpu_vpe *vpe, uint32_t inst, uint32_t offset) { uint32_t base; - base = vpe->ring.adev->reg_offset[VPE_HWIP][0][0]; + base = vpe->ring.adev->reg_offset[VPE_HWIP][inst][0]; return base + offset; } @@ -48,12 +72,14 @@ static uint32_t vpe_v6_1_get_reg_offset(struct amdgpu_vpe *vpe, uint32_t inst, u static void vpe_v6_1_halt(struct amdgpu_vpe *vpe, bool halt) { struct amdgpu_device *adev = vpe->ring.adev; - uint32_t f32_cntl; + uint32_t i, f32_cntl; - f32_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL)); - f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT, halt ? 1 : 0); - f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_RESET, halt ? 1 : 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL), f32_cntl); + for (i = 0; i < vpe->num_instances; i++) { + f32_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_F32_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT, halt ? 1 : 0); + f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_RESET, halt ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_F32_CNTL), f32_cntl); + } } static int vpe_v6_1_irq_init(struct amdgpu_vpe *vpe) @@ -70,20 +96,58 @@ static int vpe_v6_1_irq_init(struct amdgpu_vpe *vpe) return 0; } +static void vpe_v6_1_set_collaborate_mode(struct amdgpu_vpe *vpe, bool enable) +{ + struct amdgpu_device *adev = vpe->ring.adev; + uint32_t vpe_colla_cntl, vpe_colla_cfg, i; + + if (!vpe->collaborate_mode) + return; + + for (i = 0; i < vpe->num_instances; i++) { + vpe_colla_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CNTL)); + vpe_colla_cntl = REG_SET_FIELD(vpe_colla_cntl, VPEC_COLLABORATE_CNTL, + COLLABORATE_MODE_EN, enable ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CNTL), vpe_colla_cntl); + + vpe_colla_cfg = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CFG)); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, MASTER_ID, 0); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, MASTER_EN, enable ? 1 : 0); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, SLAVE0_ID, 1); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, SLAVE0_EN, enable ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CFG), vpe_colla_cfg); + } +} + static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) { struct amdgpu_device *adev = vpe->ring.adev; const struct vpe_firmware_header_v1_0 *vpe_hdr; const __le32 *data; uint32_t ucode_offset[2], ucode_size[2]; - uint32_t i, size_dw; + uint32_t i, j, size_dw; uint32_t ret; - // disable UMSCH_INT_ENABLE - ret = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL)); - ret = REG_SET_FIELD(ret, VPEC_CNTL, UMSCH_INT_ENABLE, 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), ret); + /* disable UMSCH_INT_ENABLE */ + for (j = 0; j < vpe->num_instances; j++) { + + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + ret = RREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL_6_1_1)); + else + ret = RREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL)); + + ret = REG_SET_FIELD(ret, VPEC_CNTL, UMSCH_INT_ENABLE, 0); + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL_6_1_1), ret); + else + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL), ret); + } + + /* + * For VPE 6.1.1, still only need to add master's offset, and psp will apply it to slave as well. + * Here use instance 0 as master. + */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { uint32_t f32_offset, f32_cntl; @@ -96,8 +160,7 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl; amdgpu_vpe_psp_update_sram(adev); - - /* Config DPM */ + vpe_v6_1_set_collaborate_mode(vpe, true); amdgpu_vpe_configure_dpm(vpe); return 0; @@ -114,25 +177,26 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) vpe_v6_1_halt(vpe, true); - for (i = 0; i < 2; i++) { - if (i > 0) - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE_ADDR), VPE_THREAD1_UCODE_OFFSET); - else - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE_ADDR), 0); - - data = (const __le32 *)(adev->vpe.fw->data + ucode_offset[i]); - size_dw = ucode_size[i] / sizeof(__le32); - - while (size_dw--) { - if (amdgpu_emu_mode && size_dw % 500 == 0) - msleep(1); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE_DATA), le32_to_cpup(data++)); + for (j = 0; j < vpe->num_instances; j++) { + for (i = 0; i < 2; i++) { + if (i > 0) + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_ADDR), VPE_THREAD1_UCODE_OFFSET); + else + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_ADDR), 0); + + data = (const __le32 *)(adev->vpe.fw->data + ucode_offset[i]); + size_dw = ucode_size[i] / sizeof(__le32); + + while (size_dw--) { + if (amdgpu_emu_mode && size_dw % 500 == 0) + msleep(1); + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_DATA), le32_to_cpup(data++)); + } } - } vpe_v6_1_halt(vpe, false); - /* Config DPM */ + vpe_v6_1_set_collaborate_mode(vpe, true); amdgpu_vpe_configure_dpm(vpe); return 0; @@ -142,68 +206,68 @@ static int vpe_v6_1_ring_start(struct amdgpu_vpe *vpe) { struct amdgpu_ring *ring = &vpe->ring; struct amdgpu_device *adev = ring->adev; - uint32_t rb_bufsz, rb_cntl; - uint32_t ib_cntl; uint32_t doorbell, doorbell_offset; + uint32_t rb_bufsz, rb_cntl; + uint32_t ib_cntl, i; int ret; - rb_bufsz = order_base_2(ring->ring_size / 4); - rb_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_PRIV, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_VMID, 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL), rb_cntl); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR), 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR_HI), 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR), 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR_HI), 0); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR_ADDR_LO), - lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR_ADDR_HI), - upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_BASE), ring->gpu_addr >> 8); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); - - ring->wptr = 0; - - /* before programing wptr to a less value, need set minor_ptr_update first */ - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 1); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); - - /* set minor_ptr_update to 0 after wptr programed */ - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 0); - - doorbell = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL)); - doorbell_offset = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL_OFFSET)); - - doorbell = REG_SET_FIELD(doorbell, VPEC_QUEUE0_DOORBELL, ENABLE, ring->use_doorbell ? 1 : 0); - doorbell_offset = REG_SET_FIELD(doorbell_offset, VPEC_QUEUE0_DOORBELL_OFFSET, OFFSET, ring->doorbell_index); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL), doorbell); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL_OFFSET), doorbell_offset); - - adev->nbio.funcs->vpe_doorbell_range(adev, 0, ring->use_doorbell, ring->doorbell_index, 2); - - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_ENABLE, 1); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL), rb_cntl); - - ib_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, VPEC_QUEUE0_IB_CNTL, IB_ENABLE, 1); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_IB_CNTL), ib_cntl); - - ring->sched.ready = true; + for (i = 0; i < vpe->num_instances; i++) { + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_PRIV, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_VMID, 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL), rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR), 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_HI), 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR), 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR_HI), 0); + + /* set the wb address whether it's enabled or not */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_ADDR_LO), + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_ADDR_HI), + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); + + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_BASE), ring->gpu_addr >> 8); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); + + ring->wptr = 0; + + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 1); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + /* set minor_ptr_update to 0 after wptr programed */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 0); + + doorbell_offset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL_OFFSET)); + doorbell_offset = REG_SET_FIELD(doorbell_offset, VPEC_QUEUE0_DOORBELL_OFFSET, OFFSET, ring->doorbell_index + i*4); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL_OFFSET), doorbell_offset); + + doorbell = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL)); + doorbell = REG_SET_FIELD(doorbell, VPEC_QUEUE0_DOORBELL, ENABLE, ring->use_doorbell ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL), doorbell); + + adev->nbio.funcs->vpe_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index + i*4, 4); + + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_ENABLE, 1); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL), rb_cntl); + + ib_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, VPEC_QUEUE0_IB_CNTL, IB_ENABLE, 1); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_IB_CNTL), ib_cntl); + } ret = amdgpu_ring_test_helper(ring); - if (ret) { - ring->sched.ready = false; + if (ret) return ret; - } return 0; } @@ -211,17 +275,30 @@ static int vpe_v6_1_ring_start(struct amdgpu_vpe *vpe) static int vpe_v_6_1_ring_stop(struct amdgpu_vpe *vpe) { struct amdgpu_device *adev = vpe->ring.adev; - uint32_t queue_reset; + uint32_t queue_reset, i; int ret; - queue_reset = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE_RESET_REQ)); - queue_reset = REG_SET_FIELD(queue_reset, VPEC_QUEUE_RESET_REQ, QUEUE0_RESET, 1); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE_RESET_REQ), queue_reset); + for (i = 0; i < vpe->num_instances; i++) { + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + queue_reset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ_6_1_1)); + else + queue_reset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ)); + + queue_reset = REG_SET_FIELD(queue_reset, VPEC_QUEUE_RESET_REQ, QUEUE0_RESET, 1); + + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) { + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ_6_1_1), queue_reset); + ret = SOC15_WAIT_ON_RREG(VPE, i, regVPEC_QUEUE_RESET_REQ_6_1_1, 0, + VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK); + } else { + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ), queue_reset); + ret = SOC15_WAIT_ON_RREG(VPE, i, regVPEC_QUEUE_RESET_REQ, 0, + VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK); + } - ret = SOC15_WAIT_ON_RREG(VPE, 0, regVPEC_QUEUE_RESET_REQ, 0, - VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK); - if (ret) - dev_err(adev->dev, "VPE queue reset failed\n"); + if (ret) + dev_err(adev->dev, "VPE queue reset failed\n"); + } vpe->ring.sched.ready = false; @@ -236,10 +313,18 @@ static int vpe_v6_1_set_trap_irq_state(struct amdgpu_device *adev, struct amdgpu_vpe *vpe = &adev->vpe; uint32_t vpe_cntl; - vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL)); + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL_6_1_1)); + else + vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL)); + vpe_cntl = REG_SET_FIELD(vpe_cntl, VPEC_CNTL, TRAP_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), vpe_cntl); + + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL_6_1_1), vpe_cntl); + else + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), vpe_cntl); return 0; } @@ -264,13 +349,19 @@ static int vpe_v6_1_process_trap_irq(struct amdgpu_device *adev, static int vpe_v6_1_set_regs(struct amdgpu_vpe *vpe) { + struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe); + vpe->regs.queue0_rb_rptr_lo = regVPEC_QUEUE0_RB_RPTR; vpe->regs.queue0_rb_rptr_hi = regVPEC_QUEUE0_RB_RPTR_HI; vpe->regs.queue0_rb_wptr_lo = regVPEC_QUEUE0_RB_WPTR; vpe->regs.queue0_rb_wptr_hi = regVPEC_QUEUE0_RB_WPTR_HI; vpe->regs.queue0_preempt = regVPEC_QUEUE0_PREEMPT; - vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2; + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2_6_1_1; + else + vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2; + vpe->regs.dpm_pratio = regVPEC_QUEUE6_DUMMY4; vpe->regs.dpm_request_interval = regVPEC_QUEUE5_DUMMY3; vpe->regs.dpm_decision_threshold = regVPEC_QUEUE5_DUMMY4; |