diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 393 |
1 files changed, 218 insertions, 175 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 52a647d7022d..e0302c23e9a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -29,6 +29,7 @@ #include "amdgpu.h" #include "amdgpu_gfx.h" +#include "amdgpu_ring.h" #include "vi.h" #include "vi_structs.h" #include "vid.h" @@ -729,8 +730,13 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); +#define CG_ACLK_CNTL__ACLK_DIVIDER_MASK 0x0000007fL +#define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT 0x00000000L + static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) { + uint32_t data; + switch (adev->asic_type) { case CHIP_TOPAZ: amdgpu_device_program_register_sequence(adev, @@ -790,11 +796,14 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) amdgpu_device_program_register_sequence(adev, polaris10_golden_common_all, ARRAY_SIZE(polaris10_golden_common_all)); - WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); - if (adev->pdev->revision == 0xc7 && + data = RREG32_SMC(ixCG_ACLK_CNTL); + data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK; + data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT; + WREG32_SMC(ixCG_ACLK_CNTL, data); + if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) && ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || - (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { + (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) { amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); } @@ -888,7 +897,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, &ib); + r = amdgpu_ib_get(adev, NULL, 16, + AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; @@ -1318,6 +1328,10 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) return r; } + /* init spm vmid with 0xf */ + if (adev->gfx.rlc.funcs->update_spm_vmid) + adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); + return 0; } @@ -1338,21 +1352,22 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) amdgpu_gfx_compute_queue_acquire(adev); mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; + if (mec_hpd_size) { + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.mec.hpd_eop_obj, + &adev->gfx.mec.hpd_eop_gpu_addr, + (void **)&hpd); + if (r) { + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); + return r; + } - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.mec.hpd_eop_obj, - &adev->gfx.mec.hpd_eop_gpu_addr, - (void **)&hpd); - if (r) { - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); - return r; - } - - memset(hpd, 0, mec_hpd_size); + memset(hpd, 0, mec_hpd_size); - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + } return 0; } @@ -1546,7 +1561,8 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* allocate an indirect buffer to put the commands in */ memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, total_size, &ib); + r = amdgpu_ib_get(adev, NULL, total_size, + AMDGPU_IB_POOL_DIRECT, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); return r; @@ -1677,7 +1693,7 @@ fail: static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; - u32 mc_shared_chmap, mc_arb_ramcfg; + u32 mc_arb_ramcfg; u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; u32 tmp; int ret; @@ -1817,10 +1833,14 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) break; } - mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; + adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFBANK); + adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFRANKS); + adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; adev->gfx.config.mem_max_burst_length_bytes = 256; if (adev->flags & AMD_IS_APU) { @@ -1884,6 +1904,7 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, int r; unsigned irq_type; struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + unsigned int hw_prio; ring = &adev->gfx.compute_ring[ring_id]; @@ -1903,9 +1924,11 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? + AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT; /* type-2 packets are deprecated on MEC, use type-3 instead */ - r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, irq_type); + r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, + hw_prio, NULL); if (r) return r; @@ -2009,7 +2032,8 @@ static int gfx_v8_0_sw_init(void *handle) } r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, - AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); + AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, + AMDGPU_RING_PRIO_DEFAULT, NULL); if (r) return r; } @@ -3236,7 +3260,7 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) dev_warn(adev->dev, "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", adev->asic_type); - /* fall through */ + fallthrough; case CHIP_CARRIZO: modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | @@ -3663,6 +3687,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } +#define DEFAULT_SH_MEM_BASES (0x6000) /** * gfx_v8_0_init_compute_vmid - gart enable * @@ -3671,9 +3696,6 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) * Initialize compute vmid sh_mem registers * */ -#define DEFAULT_SH_MEM_BASES (0x6000) -#define FIRST_COMPUTE_VMID (8) -#define LAST_COMPUTE_VMID (16) static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) { int i; @@ -3696,7 +3718,7 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) SH_MEM_CONFIG__PRIVATE_ATC_MASK; mutex_lock(&adev->srbm_mutex); - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { vi_srbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32(mmSH_MEM_CONFIG, sh_mem_config); @@ -3709,7 +3731,7 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) /* Initialize all compute VMIDs to have no GDS, GWS, or OA acccess. These should be enabled by FW for target VMIDs. */ - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32(amdgpu_gds_reg_offset[i].mem_base, 0); WREG32(amdgpu_gds_reg_offset[i].mem_size, 0); WREG32(amdgpu_gds_reg_offset[i].gws, 0); @@ -3727,7 +3749,7 @@ static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev) * the driver can enable them for graphics. VMID0 should maintain * access so that HWS firmware can save/restore entries. */ - for (vmid = 1; vmid < 16; vmid++) { + for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); @@ -4112,7 +4134,6 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) { - int i; u32 tmp = RREG32(mmCP_ME_CNTL); if (enable) { @@ -4123,8 +4144,6 @@ static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - adev->gfx.gfx_ring[i].sched.ready = false; } WREG32(mmCP_ME_CNTL, tmp); udelay(50); @@ -4312,14 +4331,10 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) { - int i; - if (enable) { WREG32(mmCP_MEC_CNTL, 0); } else { WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); - for (i = 0; i < adev->gfx.num_compute_rings; i++) - adev->gfx.compute_ring[i].sched.ready = false; adev->gfx.kiq.ring.sched.ready = false; } udelay(50); @@ -4422,6 +4437,19 @@ static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) return r; } +static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { + if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) { + mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; + mqd->cp_hqd_queue_priority = + AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; + } + } +} + static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -4545,9 +4573,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) /* defaults */ mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); - mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); - mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); - mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); @@ -4559,13 +4584,20 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* set static priority for a queue/ring */ + gfx_v8_0_mqd_set_priority(ring, mqd); + mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); + + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } -int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, +static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd) { uint32_t mqd_reg; @@ -4610,7 +4642,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) gfx_v8_0_kiq_setting(ring); - if (adev->in_gpu_reset) { /* for GPU_RESET case */ + if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); @@ -4647,7 +4679,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) struct vi_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (!adev->in_gpu_reset && !adev->in_suspend) { + if (!amdgpu_in_reset(adev) && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; @@ -4659,7 +4691,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); - } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ + } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); @@ -5035,7 +5067,7 @@ static int gfx_v8_0_pre_soft_reset(void *handle) gfx_v8_0_cp_compute_enable(adev, false); } - return 0; + return 0; } static int gfx_v8_0_soft_reset(void *handle) @@ -5247,6 +5279,7 @@ static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, @@ -5272,7 +5305,8 @@ static int gfx_v8_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + AMDGPU_MAX_COMPUTE_RINGS); adev->gfx.funcs = &gfx_v8_0_gfx_funcs; gfx_v8_0_set_ring_funcs(adev); gfx_v8_0_set_irq_funcs(adev); @@ -5320,10 +5354,9 @@ static int gfx_v8_0_late_init(void *handle) static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, bool enable) { - if (((adev->asic_type == CHIP_POLARIS11) || + if ((adev->asic_type == CHIP_POLARIS11) || (adev->asic_type == CHIP_POLARIS12) || - (adev->asic_type == CHIP_VEGAM)) && - adev->powerplay.pp_funcs->set_powergating_by_smu) + (adev->asic_type == CHIP_VEGAM)) /* Send msg to SMU via Powerplay */ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable); @@ -5587,6 +5620,24 @@ static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev) } } +static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +{ + u32 data; + + if (amdgpu_sriov_is_pp_one_vf(adev)) + data = RREG32_NO_KIQ(mmRLC_SPM_VMID); + else + data = RREG32(mmRLC_SPM_VMID); + + data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK; + data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT; + + if (amdgpu_sriov_is_pp_one_vf(adev)) + WREG32_NO_KIQ(mmRLC_SPM_VMID, data); + else + WREG32(mmRLC_SPM_VMID, data); +} + static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { .is_rlc_enabled = gfx_v8_0_is_rlc_enabled, .set_safe_mode = gfx_v8_0_set_safe_mode, @@ -5598,7 +5649,8 @@ static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { .resume = gfx_v8_0_rlc_resume, .stop = gfx_v8_0_rlc_stop, .reset = gfx_v8_0_rlc_reset, - .start = gfx_v8_0_rlc_start + .start = gfx_v8_0_rlc_start, + .update_spm_vmid = gfx_v8_0_update_spm_vmid }; static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, @@ -5838,8 +5890,7 @@ static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, PP_BLOCK_GFX_CG, pp_support_state, pp_state); - if (adev->powerplay.pp_funcs->set_clockgating_by_smu) - amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); + amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); } if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { @@ -5860,8 +5911,7 @@ static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, PP_BLOCK_GFX_MG, pp_support_state, pp_state); - if (adev->powerplay.pp_funcs->set_clockgating_by_smu) - amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); + amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); } return 0; @@ -5890,8 +5940,7 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, PP_BLOCK_GFX_CG, pp_support_state, pp_state); - if (adev->powerplay.pp_funcs->set_clockgating_by_smu) - amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); + amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); } if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) { @@ -5910,8 +5959,7 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, PP_BLOCK_GFX_3D, pp_support_state, pp_state); - if (adev->powerplay.pp_funcs->set_clockgating_by_smu) - amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); + amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); } if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { @@ -5932,8 +5980,7 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, PP_BLOCK_GFX_MG, pp_support_state, pp_state); - if (adev->powerplay.pp_funcs->set_clockgating_by_smu) - amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); + amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); } if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { @@ -5948,8 +5995,7 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, PP_BLOCK_GFX_RLC, pp_support_state, pp_state); - if (adev->powerplay.pp_funcs->set_clockgating_by_smu) - amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); + amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); } if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { @@ -5963,8 +6009,7 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, PP_BLOCK_GFX_CP, pp_support_state, pp_state); - if (adev->powerplay.pp_funcs->set_clockgating_by_smu) - amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); + amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); } return 0; @@ -6092,7 +6137,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { control |= INDIRECT_BUFFER_PRE_ENB(1); - if (!(ib->flags & AMDGPU_IB_FLAG_CE)) + if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid) gfx_v8_0_ring_emit_de_meta(ring); } @@ -6234,104 +6279,6 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } -static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, - bool acquire) -{ - struct amdgpu_device *adev = ring->adev; - int pipe_num, tmp, reg; - int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; - - pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; - - /* first me only has 2 entries, GFX and HP3D */ - if (ring->me > 0) - pipe_num -= 2; - - reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; - tmp = RREG32(reg); - tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); - WREG32(reg, tmp); -} - -static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - bool acquire) -{ - int i, pipe; - bool reserve; - struct amdgpu_ring *iring; - - mutex_lock(&adev->gfx.pipe_reserve_mutex); - pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); - if (acquire) - set_bit(pipe, adev->gfx.pipe_reserve_bitmap); - else - clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); - - if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { - /* Clear all reservations - everyone reacquires all resources */ - for (i = 0; i < adev->gfx.num_gfx_rings; ++i) - gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], - true); - - for (i = 0; i < adev->gfx.num_compute_rings; ++i) - gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], - true); - } else { - /* Lower all pipes without a current reservation */ - for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { - iring = &adev->gfx.gfx_ring[i]; - pipe = amdgpu_gfx_mec_queue_to_bit(adev, - iring->me, - iring->pipe, - 0); - reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); - gfx_v8_0_ring_set_pipe_percent(iring, reserve); - } - - for (i = 0; i < adev->gfx.num_compute_rings; ++i) { - iring = &adev->gfx.compute_ring[i]; - pipe = amdgpu_gfx_mec_queue_to_bit(adev, - iring->me, - iring->pipe, - 0); - reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); - gfx_v8_0_ring_set_pipe_percent(iring, reserve); - } - } - - mutex_unlock(&adev->gfx.pipe_reserve_mutex); -} - -static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - bool acquire) -{ - uint32_t pipe_priority = acquire ? 0x2 : 0x0; - uint32_t queue_priority = acquire ? 0xf : 0x0; - - mutex_lock(&adev->srbm_mutex); - vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - - WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority); - WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority); - - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); -} -static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, - enum drm_sched_priority priority) -{ - struct amdgpu_device *adev = ring->adev; - bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; - - if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) - return; - - gfx_v8_0_hqd_set_priority(adev, ring, acquire); - gfx_v8_0_pipe_reserve_resources(adev, ring, acquire); -} - static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) @@ -6444,7 +6391,8 @@ static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; } -static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) +static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, + uint32_t reg_val_offs) { struct amdgpu_device *adev = ring->adev; @@ -6455,9 +6403,9 @@ static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) amdgpu_ring_write(ring, reg); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + reg_val_offs * 4)); amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + reg_val_offs * 4)); } static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, @@ -6771,7 +6719,8 @@ static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev, return 0; } -static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data) +static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data, + bool from_wq) { u32 enc, se_id, sh_id, cu_id; char type[20]; @@ -6809,7 +6758,7 @@ static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data) * or from BH in which case we can access SQ_EDC_INFO * instance */ - if (in_task()) { + if (from_wq) { mutex_lock(&adev->grbm_idx_mutex); gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id); @@ -6847,7 +6796,7 @@ static void gfx_v8_0_sq_irq_work_func(struct work_struct *work) struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work); struct sq_work *sq_work = container_of(work, struct sq_work, work); - gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data); + gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true); } static int gfx_v8_0_sq_irq(struct amdgpu_device *adev, @@ -6862,7 +6811,7 @@ static int gfx_v8_0_sq_irq(struct amdgpu_device *adev, * just print whatever info is possible directly from the ISR. */ if (work_pending(&adev->gfx.sq_work.work)) { - gfx_v8_0_parse_sq_irq(adev, ih_data); + gfx_v8_0_parse_sq_irq(adev, ih_data, false); } else { adev->gfx.sq_work.ih_data = ih_data; schedule_work(&adev->gfx.sq_work.work); @@ -6871,6 +6820,94 @@ static int gfx_v8_0_sq_irq(struct amdgpu_device *adev, return 0; } +static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); + amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | + PACKET3_TC_ACTION_ENA | + PACKET3_SH_KCACHE_ACTION_ENA | + PACKET3_SH_ICACHE_ACTION_ENA | + PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */ + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ + amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ +} + +static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); + amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | + PACKET3_TC_ACTION_ENA | + PACKET3_SH_KCACHE_ACTION_ENA | + PACKET3_SH_ICACHE_ACTION_ENA | + PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */ + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ + amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ + amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ +} + + +/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */ +#define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT 0x0000007f +static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring, + uint32_t pipe, bool enable) +{ + uint32_t val; + uint32_t wcl_cs_reg; + + val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT; + + switch (pipe) { + case 0: + wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0; + break; + case 1: + wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1; + break; + case 2: + wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2; + break; + case 3: + wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3; + break; + default: + DRM_DEBUG("invalid pipe %d\n", pipe); + return; + } + + amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val); + +} + +#define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT 0x07ffffff +static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t val; + int i; + + /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit + * number of gfx waves. Setting 5 bit will make sure gfx only gets + * around 25% of gpu resources. + */ + val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT; + amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val); + + /* Restrict waves for normal/low priority compute queues as well + * to get best QoS for high priority compute jobs. + * + * amdgpu controls only 1st ME(0-3 CS pipes). + */ + for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { + if (i != ring->pipe) + gfx_v8_0_emit_wave_limit_cs(ring, i, enable); + + } + +} + static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .name = "gfx_v8_0", .early_init = gfx_v8_0_early_init, @@ -6917,7 +6954,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 3 + /* CNTX_CTRL */ 5 + /* HDP_INVL */ 12 + 12 + /* FENCE x2 */ - 2, /* SWITCH_BUFFER */ + 2 + /* SWITCH_BUFFER */ + 5, /* SURFACE_SYNC */ .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ .emit_ib = gfx_v8_0_ring_emit_ib_gfx, .emit_fence = gfx_v8_0_ring_emit_fence_gfx, @@ -6935,6 +6973,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, .emit_wreg = gfx_v8_0_ring_emit_wreg, .soft_recovery = gfx_v8_0_ring_soft_recovery, + .emit_mem_sync = gfx_v8_0_emit_mem_sync, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { @@ -6951,7 +6990,10 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 5 + /* hdp_invalidate */ 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ - 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ + 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ + 7 + /* gfx_v8_0_emit_mem_sync_compute */ + 5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ + 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */ .emit_ib = gfx_v8_0_ring_emit_ib_compute, .emit_fence = gfx_v8_0_ring_emit_fence_compute, @@ -6963,8 +7005,9 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .test_ib = gfx_v8_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, - .set_priority = gfx_v8_0_ring_set_priority_compute, .emit_wreg = gfx_v8_0_ring_emit_wreg, + .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute, + .emit_wave_limit = gfx_v8_0_emit_wave_limit, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { |