diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 571 |
1 files changed, 432 insertions, 139 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 2408ed4c7d84..f5e9c022960b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -47,7 +47,7 @@ #include "gfx_v10_0.h" #include "nbio_v2_3.h" -/** +/* * Navi10 has two graphic rings to share each graphic pipe. * 1. Primary ring * 2. Async ring @@ -173,6 +173,9 @@ #define mmGC_THROTTLE_CTRL_Sienna_Cichlid 0x2030 #define mmGC_THROTTLE_CTRL_Sienna_Cichlid_BASE_IDX 0 +#define mmRLC_SPARE_INT_0_Sienna_Cichlid 0x4ca5 +#define mmRLC_SPARE_INT_0_Sienna_Cichlid_BASE_IDX 1 + #define GFX_RLCG_GC_WRITE_OLD (0x8 << 28) #define GFX_RLCG_GC_WRITE (0x0 << 28) #define GFX_RLCG_GC_READ (0x1 << 28) @@ -232,6 +235,20 @@ MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec.bin"); MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec2.bin"); MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_rlc.bin"); +MODULE_FIRMWARE("amdgpu/beige_goby_ce.bin"); +MODULE_FIRMWARE("amdgpu/beige_goby_pfp.bin"); +MODULE_FIRMWARE("amdgpu/beige_goby_me.bin"); +MODULE_FIRMWARE("amdgpu/beige_goby_mec.bin"); +MODULE_FIRMWARE("amdgpu/beige_goby_mec2.bin"); +MODULE_FIRMWARE("amdgpu/beige_goby_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/yellow_carp_ce.bin"); +MODULE_FIRMWARE("amdgpu/yellow_carp_pfp.bin"); +MODULE_FIRMWARE("amdgpu/yellow_carp_me.bin"); +MODULE_FIRMWARE("amdgpu/yellow_carp_mec.bin"); +MODULE_FIRMWARE("amdgpu/yellow_carp_mec2.bin"); +MODULE_FIRMWARE("amdgpu/yellow_carp_rlc.bin"); + static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), @@ -1395,9 +1412,10 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04900000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1800ff, 0x00000044), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x00007fff, 0x000001fe), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), @@ -1415,46 +1433,45 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000820, 0x00000820), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070104), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0xffdf80ff, 0x479c0010), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00c00000) }; -static bool gfx_v10_is_rlcg_rw(struct amdgpu_device *adev, u32 offset, uint32_t *flag, bool write) -{ - /* always programed by rlcg, only for gc */ - if (offset == SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI) || - offset == SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO) || - offset == SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH) || - offset == SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL) || - offset == SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX) || - offset == SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL)) { - if (!amdgpu_sriov_reg_indirect_gc(adev)) - *flag = GFX_RLCG_GC_WRITE_OLD; - else - *flag = write ? GFX_RLCG_GC_WRITE : GFX_RLCG_GC_READ; +static bool gfx_v10_get_rlcg_flag(struct amdgpu_device *adev, u32 acc_flags, u32 hwip, + int write, u32 *rlcg_flag) +{ + switch (hwip) { + case GC_HWIP: + if (amdgpu_sriov_reg_indirect_gc(adev)) { + *rlcg_flag = write ? GFX_RLCG_GC_WRITE : GFX_RLCG_GC_READ; - return true; - } + return true; + /* only in new version, AMDGPU_REGS_NO_KIQ and AMDGPU_REGS_RLC enabled simultaneously */ + } else if ((acc_flags & AMDGPU_REGS_RLC) && !(acc_flags & AMDGPU_REGS_NO_KIQ)) { + *rlcg_flag = GFX_RLCG_GC_WRITE_OLD; - /* currently support gc read/write, mmhub write */ - if (offset >= SOC15_REG_OFFSET(GC, 0, mmSDMA0_DEC_START) && - offset <= SOC15_REG_OFFSET(GC, 0, mmRLC_GTS_OFFSET_MSB)) { - if (amdgpu_sriov_reg_indirect_gc(adev)) - *flag = write ? GFX_RLCG_GC_WRITE : GFX_RLCG_GC_READ; - else - return false; - } else { - if (amdgpu_sriov_reg_indirect_mmhub(adev)) - *flag = GFX_RLCG_MMHUB_WRITE; - else - return false; + return true; + } + + break; + case MMHUB_HWIP: + if (amdgpu_sriov_reg_indirect_mmhub(adev) && + (acc_flags & AMDGPU_REGS_RLC) && write) { + *rlcg_flag = GFX_RLCG_MMHUB_WRITE; + return true; + } + + break; + default: + DRM_DEBUG("Not program register by RLCG\n"); } - return true; + return false; } static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32_t flag) @@ -1478,8 +1495,15 @@ static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32 (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG2) * 4; scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3) * 4; - spare_int = adev->rmmio + - (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT) * 4; + + if (adev->asic_type >= CHIP_SIENNA_CICHLID) { + spare_int = adev->rmmio + + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_0_Sienna_Cichlid_BASE_IDX] + + mmRLC_SPARE_INT_0_Sienna_Cichlid) * 4; + } else { + spare_int = adev->rmmio + + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT) * 4; + } grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; @@ -1514,36 +1538,34 @@ static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32 return ret; } -static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 value, u32 flag) +static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 value, u32 acc_flags, u32 hwip) { - uint32_t rlcg_flag; + u32 rlcg_flag; - if (amdgpu_sriov_fullaccess(adev) && - gfx_v10_is_rlcg_rw(adev, offset, &rlcg_flag, 1)) { + if (!amdgpu_sriov_runtime(adev) && + gfx_v10_get_rlcg_flag(adev, acc_flags, hwip, 1, &rlcg_flag)) { gfx_v10_rlcg_rw(adev, offset, value, rlcg_flag); - return; } - if (flag & AMDGPU_REGS_NO_KIQ) + + if (acc_flags & AMDGPU_REGS_NO_KIQ) WREG32_NO_KIQ(offset, value); else WREG32(offset, value); } -static u32 gfx_v10_rlcg_rreg(struct amdgpu_device *adev, u32 offset, u32 flag) +static u32 gfx_v10_rlcg_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip) { - uint32_t rlcg_flag; + u32 rlcg_flag; - if (amdgpu_sriov_fullaccess(adev) && - gfx_v10_is_rlcg_rw(adev, offset, &rlcg_flag, 0)) + if (!amdgpu_sriov_runtime(adev) && + gfx_v10_get_rlcg_flag(adev, acc_flags, hwip, 0, &rlcg_flag)) return gfx_v10_rlcg_rw(adev, offset, 0, rlcg_flag); - if (flag & AMDGPU_REGS_NO_KIQ) + if (acc_flags & AMDGPU_REGS_NO_KIQ) return RREG32_NO_KIQ(offset); else return RREG32(offset); - - return 0; } static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = @@ -3365,6 +3387,30 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020), }; +static const struct soc15_reg_golden golden_settings_gc_10_3_3[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000) +}; + static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100), @@ -3404,6 +3450,41 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020) }; +static const struct soc15_reg_golden golden_settings_gc_10_3_5[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xb0000ff0, 0x30000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff000000, 0x7e000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xe07df47f, 0x00180070), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX,0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -3619,11 +3700,21 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_10_3_vangogh, (const u32)ARRAY_SIZE(golden_settings_gc_10_3_vangogh)); break; + case CHIP_YELLOW_CARP: + soc15_program_register_sequence(adev, + golden_settings_gc_10_3_3, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3_3)); + break; case CHIP_DIMGREY_CAVEFISH: soc15_program_register_sequence(adev, golden_settings_gc_10_3_4, (const u32)ARRAY_SIZE(golden_settings_gc_10_3_4)); break; + case CHIP_BEIGE_GOBY: + soc15_program_register_sequence(adev, + golden_settings_gc_10_3_5, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3_5)); + break; default: break; } @@ -3809,6 +3900,8 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: adev->gfx.cp_fw_write_wait = true; break; default: @@ -3885,7 +3978,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; char fw_name[40]; - char wks[10]; + char *wks = ""; int err; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; @@ -3898,7 +3991,6 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); - memset(wks, 0, sizeof(wks)); switch (adev->asic_type) { case CHIP_NAVI10: chip_name = "navi10"; @@ -3907,7 +3999,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) chip_name = "navi14"; if (!(adev->pdev->device == 0x7340 && adev->pdev->revision != 0x00)) - snprintf(wks, sizeof(wks), "_wks"); + wks = "_wks"; break; case CHIP_NAVI12: chip_name = "navi12"; @@ -3924,6 +4016,12 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) case CHIP_DIMGREY_CAVEFISH: chip_name = "dimgrey_cavefish"; break; + case CHIP_BEIGE_GOBY: + chip_name = "beige_goby"; + break; + case CHIP_YELLOW_CARP: + chip_name = "yellow_carp"; + break; default: BUG(); } @@ -4492,6 +4590,8 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -4616,6 +4716,8 @@ static int gfx_v10_0_sw_init(void *handle) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -4848,7 +4950,8 @@ static void gfx_v10_0_setup_rb(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { bitmap = i * adev->gfx.config.max_sh_per_se + j; - if ((adev->asic_type == CHIP_SIENNA_CICHLID) && + if (((adev->asic_type == CHIP_SIENNA_CICHLID) || + (adev->asic_type == CHIP_YELLOW_CARP)) && ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1)) continue; gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); @@ -4983,47 +5086,44 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) 4 + /* RMI */ 1); /* SQG */ - if (adev->asic_type == CHIP_NAVI10 || - adev->asic_type == CHIP_NAVI14 || - adev->asic_type == CHIP_NAVI12) { - mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { - for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); - wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); - /* - * Set corresponding TCP bits for the inactive WGPs in - * GCRD_SA_TARGETS_DISABLE - */ - gcrd_targets_disable_tcp = 0; - /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */ - utcl_invreq_disable = 0; - - for (k = 0; k < max_wgp_per_sh; k++) { - if (!(wgp_active_bitmap & (1 << k))) { - gcrd_targets_disable_tcp |= 3 << (2 * k); - utcl_invreq_disable |= (3 << (2 * k)) | - (3 << (2 * (max_wgp_per_sh + k))); - } + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); + wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); + /* + * Set corresponding TCP bits for the inactive WGPs in + * GCRD_SA_TARGETS_DISABLE + */ + gcrd_targets_disable_tcp = 0; + /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */ + utcl_invreq_disable = 0; + + for (k = 0; k < max_wgp_per_sh; k++) { + if (!(wgp_active_bitmap & (1 << k))) { + gcrd_targets_disable_tcp |= 3 << (2 * k); + gcrd_targets_disable_tcp |= 1 << (k + (max_wgp_per_sh * 2)); + utcl_invreq_disable |= (3 << (2 * k)) | + (3 << (2 * (max_wgp_per_sh + k))); } - - tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE); - /* only override TCP & SQC bits */ - tmp &= 0xffffffff << (4 * max_wgp_per_sh); - tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask); - WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp); - - tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE); - /* only override TCP bits */ - tmp &= 0xffffffff << (2 * max_wgp_per_sh); - tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask); - WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp); } - } - gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - mutex_unlock(&adev->grbm_idx_mutex); + tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE); + /* only override TCP & SQC bits */ + tmp &= (0xffffffffU << (4 * max_wgp_per_sh)); + tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask); + WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp); + + tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE); + /* only override TCP & SQC bits */ + tmp &= (0xffffffffU << (3 * max_wgp_per_sh)); + tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask); + WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp); + } } + + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); } static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev) @@ -5178,10 +5278,10 @@ static void gfx_v10_0_rlc_enable_srm(struct amdgpu_device *adev) uint32_t tmp; /* enable Save Restore Machine */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); + tmp = RREG32_SOC15(GC, 0, mmRLC_SRM_CNTL); tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); + WREG32_SOC15(GC, 0, mmRLC_SRM_CNTL, tmp); } static int gfx_v10_0_rlc_load_microcode(struct amdgpu_device *adev) @@ -6123,6 +6223,8 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, DOORBELL_RANGE_LOWER_Sienna_Cichlid, ring->doorbell_index); WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); @@ -6258,6 +6360,8 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0); break; default: @@ -6270,6 +6374,8 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); @@ -6366,6 +6472,8 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); @@ -7080,6 +7188,7 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid); WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, 0); WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern); @@ -7093,6 +7202,7 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) } break; case CHIP_VANGOGH: + case CHIP_YELLOW_CARP: return true; default: data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE); @@ -7114,6 +7224,9 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) { uint32_t data; + if (amdgpu_sriov_vf(adev)) + return; + /* initialize cam_index to 0 * index will auto-inc after each data writting */ WREG32_SOC15(GC, 0, mmGRBM_CAM_INDEX, 0); @@ -7123,6 +7236,8 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) << GRBM_CAM_DATA__CAM_ADDR__SHIFT) | @@ -7286,7 +7401,10 @@ static int gfx_v10_0_hw_init(void *handle) * init golden registers and rlc resume may override some registers, * reconfig them here */ - gfx_v10_0_tcp_harvest(adev); + if (adev->asic_type == CHIP_NAVI10 || + adev->asic_type == CHIP_NAVI14 || + adev->asic_type == CHIP_NAVI12) + gfx_v10_0_tcp_harvest(adev); r = gfx_v10_0_cp_resume(adev); if (r) @@ -7332,7 +7450,7 @@ static int gfx_v10_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); - if (!adev->in_pci_err_recovery) { + if (!adev->no_hw_access) { #ifndef BRING_UP_DEBUG if (amdgpu_async_gfx_ring) { r = gfx_v10_0_kiq_disable_kgq(adev); @@ -7347,9 +7465,15 @@ static int gfx_v10_0_hw_fini(void *handle) if (amdgpu_sriov_vf(adev)) { gfx_v10_0_cp_gfx_enable(adev, false); /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */ - tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); - tmp &= 0xffffff00; - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + if (adev->asic_type >= CHIP_SIENNA_CICHLID) { + tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid); + tmp &= 0xffffff00; + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp); + } else { + tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); + tmp &= 0xffffff00; + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + } return 0; } @@ -7432,6 +7556,8 @@ static int gfx_v10_0_soft_reset(void *handle) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY_Sienna_Cichlid)) grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, @@ -7485,6 +7611,7 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) mutex_lock(&adev->gfx.gpu_clock_mutex); switch (adev->asic_type) { case CHIP_VANGOGH: + case CHIP_YELLOW_CARP: clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh) | ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL); break; @@ -7541,6 +7668,8 @@ static int gfx_v10_0_early_init(void *handle) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid; break; default: @@ -7597,6 +7726,8 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); /* wait for RLC_SAFE_MODE */ @@ -7631,6 +7762,8 @@ static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); break; default: @@ -7644,6 +7777,9 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade { uint32_t data, def; + if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) + return; + /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { /* 0 - Disable some blocks' MGCG */ @@ -7658,6 +7794,7 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK); if (def != data) @@ -7680,13 +7817,15 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); } } - } else { + } else if (!enable || !(adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { /* 1 - MGCG_OVERRIDE */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | - RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK); if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); @@ -7712,22 +7851,34 @@ static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev, { uint32_t data, def; + if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS))) + return; + /* Enable 3D CGCG/CGLS */ - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { + if (enable) { /* write cmd to clear cgcg/cgls ov */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + /* unset CGCG override */ - data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; + /* update CGCG and CGLS override bits */ if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); + /* enable 3Dcgcg FSM(0x0000363f) */ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); - data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | - RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + data = 0; + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) + data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; + if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); @@ -7740,9 +7891,14 @@ static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev, } else { /* Disable CGCG/CGLS */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); + /* disable cgcg, cgls should be disabled */ - data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | - RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) + data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) + data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; + /* disable cgcg and cgls in FSM */ if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); @@ -7754,25 +7910,35 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade { uint32_t def, data; - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { + if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS))) + return; + + if (enable) { def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + /* unset CGCG override */ - data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; - else - data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; + /* update CGCG and CGLS override bits */ if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); /* enable cgcg FSM(0x0000363F) */ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); - data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | - RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + data = 0; + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) + data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); @@ -7784,8 +7950,14 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); } else { def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); + /* reset CGCG/CGLS bits */ - data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) + data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + /* disable cgcg and cgls in FSM */ if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); @@ -7797,7 +7969,10 @@ static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev, { uint32_t def, data; - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) { + if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) + return; + + if (enable) { def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); /* unset FGCG override */ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; @@ -7828,6 +8003,97 @@ static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev, } } +static void gfx_v10_0_apply_medium_grain_clock_gating_workaround(struct amdgpu_device *adev) +{ + uint32_t reg_data = 0; + uint32_t reg_idx = 0; + uint32_t i; + + const uint32_t tcp_ctrl_regs[] = { + mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP12_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP12_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP12_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP12_CU1_TCP_CTRL_REG + }; + + const uint32_t tcp_ctrl_regs_nv12[] = { + mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG, + }; + + const uint32_t sm_ctlr_regs[] = { + mmCGTS_SA0_QUAD0_SM_CTRL_REG, + mmCGTS_SA0_QUAD1_SM_CTRL_REG, + mmCGTS_SA1_QUAD0_SM_CTRL_REG, + mmCGTS_SA1_QUAD1_SM_CTRL_REG + }; + + if (adev->asic_type == CHIP_NAVI12) { + for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs_nv12); i++) { + reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] + + tcp_ctrl_regs_nv12[i]; + reg_data = RREG32(reg_idx); + reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK; + WREG32(reg_idx, reg_data); + } + } else { + for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs); i++) { + reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] + + tcp_ctrl_regs[i]; + reg_data = RREG32(reg_idx); + reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK; + WREG32(reg_idx, reg_data); + } + } + + for (i = 0; i < ARRAY_SIZE(sm_ctlr_regs); i++) { + reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_QUAD0_SM_CTRL_REG_BASE_IDX] + + sm_ctlr_regs[i]; + reg_data = RREG32(reg_idx); + reg_data &= ~CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE_MASK; + reg_data |= 2 << CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE__SHIFT; + WREG32(reg_idx, reg_data); + } +} + static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) { @@ -7844,6 +8110,10 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, gfx_v10_0_update_3d_clock_gating(adev, enable); /* === CGCG + CGLS === */ gfx_v10_0_update_coarse_grain_clock_gating(adev, enable); + + if ((adev->asic_type >= CHIP_NAVI10) && + (adev->asic_type <= CHIP_NAVI12)) + gfx_v10_0_apply_medium_grain_clock_gating_workaround(adev); } else { /* CGCG/CGLS should be disabled before MGCG/MGLS * === CGCG + CGLS === @@ -7873,12 +8143,12 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) { u32 reg, data; - + /* not for *_SOC15 */ reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); if (amdgpu_sriov_is_pp_one_vf(adev)) data = RREG32_NO_KIQ(reg); else - data = RREG32(reg); + data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL); data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; @@ -7933,12 +8203,23 @@ static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable) * in refclk count. Note that RLC FW is modified to take 16 bits from * RLC_PG_DELAY_3[15:0] as the hysteresis instead of just 8 bits. * - * The recommendation from RLC team is setting RLC_PG_DELAY_3 to 200us(0x4E20) - * as part of CGPG enablement starting point. + * The recommendation from RLC team is setting RLC_PG_DELAY_3 to 200us as part) + * of CGPG enablement starting point. + * Power/performance team will optimize it and might give a new value later. */ - if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && adev->asic_type == CHIP_VANGOGH) { - data = 0x4E20 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh; - WREG32_SOC15(GC, 0, mmRLC_PG_DELAY_3, data); + if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { + switch (adev->asic_type) { + case CHIP_VANGOGH: + data = 0x4E20 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh; + WREG32_SOC15(GC, 0, mmRLC_PG_DELAY_3, data); + break; + case CHIP_YELLOW_CARP: + data = 0x1388 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh; + WREG32_SOC15(GC, 0, mmRLC_PG_DELAY_3, data); + break; + default: + break; + } } } @@ -7998,9 +8279,11 @@ static int gfx_v10_0_set_powergating_state(void *handle, case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: amdgpu_gfx_off_ctrl(adev, enable); break; case CHIP_VANGOGH: + case CHIP_YELLOW_CARP: gfx_v10_cntl_pg(adev, enable); amdgpu_gfx_off_ctrl(adev, enable); break; @@ -8026,6 +8309,8 @@ static int gfx_v10_0_set_clockgating_state(void *handle, case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: gfx_v10_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; @@ -8618,16 +8903,16 @@ gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - cp_int_cntl = RREG32(cp_int_cntl_reg); + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 0); - WREG32(cp_int_cntl_reg, cp_int_cntl); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: - cp_int_cntl = RREG32(cp_int_cntl_reg); + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 1); - WREG32(cp_int_cntl_reg, cp_int_cntl); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); break; default: break; @@ -8671,16 +8956,16 @@ static void gfx_v10_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 0); - WREG32(mec_int_cntl_reg, mec_int_cntl); + WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: - mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 1); - WREG32(mec_int_cntl_reg, mec_int_cntl); + WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); break; default: break; @@ -8876,20 +9161,20 @@ static int gfx_v10_0_kiq_set_interrupt_state(struct amdgpu_device *adev, GENERIC2_INT_ENABLE, 0); WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); - tmp = RREG32(target); + tmp = RREG32_SOC15_IP(GC, target); tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, GENERIC2_INT_ENABLE, 0); - WREG32(target, tmp); + WREG32_SOC15_IP(GC, target, tmp); } else { tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, GENERIC2_INT_ENABLE, 1); WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); - tmp = RREG32(target); + tmp = RREG32_SOC15_IP(GC, target); tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, GENERIC2_INT_ENABLE, 1); - WREG32(target, tmp); + WREG32_SOC15_IP(GC, target, tmp); } break; default: @@ -9132,13 +9417,15 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_NAVI10: case CHIP_NAVI14: - case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; break; case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs_sriov; break; default: @@ -9174,17 +9461,22 @@ static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device * static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) { - u32 data, wgp_bitmask; - data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); - data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); + u32 disabled_mask = + ~amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); + u32 efuse_setting = 0; + u32 vbios_setting = 0; + + efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); + efuse_setting &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; + efuse_setting >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; - data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; - data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; + vbios_setting = RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); + vbios_setting &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; + vbios_setting >>= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; - wgp_bitmask = - amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); + disabled_mask |= efuse_setting | vbios_setting; - return (~data) & wgp_bitmask; + return (~disabled_mask); } static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) @@ -9221,7 +9513,8 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { bitmap = i * adev->gfx.config.max_sh_per_se + j; - if ((adev->asic_type == CHIP_SIENNA_CICHLID) && + if (((adev->asic_type == CHIP_SIENNA_CICHLID) || + (adev->asic_type == CHIP_YELLOW_CARP)) && ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1)) continue; mask = 1; |