diff options
author | Dave Airlie <airlied@redhat.com> | 2016-05-06 14:17:22 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2016-05-06 14:17:22 +1000 |
commit | a64424d722504926f3375bc4887976e3bfe3a01d (patch) | |
tree | abf5419151be98a8520e30b1ae7935127a678796 /drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | |
parent | 0552f7651bc233e5407ab06ba97a9d7c25e19580 (diff) | |
parent | 84fae133f0ccc974a425eee21101f5644bd8d14d (diff) |
Merge branch 'drm-next-4.7' of git://people.freedesktop.org/~agd5f/linux into drm-next
This is the first big radeon/amdgpu pull request for 4.7. Highlights:
- Polaris support in amdgpu
Current display stack on par with other asics, for advanced features DAL is required
Power management support
Support for GFX, Compute, SDMA, UVD, VCE
- VCE and UVD init/fini cleanup in radeon
- GPUVM improvements
- Scheduler improvements
- Clockgating improvements
- Powerplay improvements
- TTM changes to support driver specific LRU update mechanism
- Radeon support for new Mesa features
- ASYNC pageflip support for radeon
- Lots of bug fixes and code cleanups
* 'drm-next-4.7' of git://people.freedesktop.org/~agd5f/linux: (180 commits)
drm/amdgpu: Replace rcu_assign_pointer() with RCU_INIT_POINTER()
drm/amdgpu: use drm_mode_vrefresh() rather than mode->vrefresh
drm/amdgpu/uvd6: add bypass support for fiji (v3)
drm/amdgpu/fiji: set UVD CG state when enabling UVD DPM (v2)
drm/powerplay: add missing clockgating callback for tonga
drm/amdgpu: Constify some tables
drm/amd/powerplay: Delete dead struct declaration
drm/amd/powerplay/hwmgr: don't add invalid voltage
drm/amd/powerplay/hwmgr: prevent VDDC from exceeding 2V
MAINTAINERS: Remove unneded wildcard for the Radeon/AMDGPU drivers
drm/radeon: add cayman VM support for append packet.
drm/amd/amdgpu: Add debugfs entries for smc/didt/pcie
drm/amd/amdgpu: Drop print_status callbacks.
drm/amd/powerplay: revise reading/writing pptable on Polaris10
drm/amd/powerplay: revise reading/writing pptable on Tonga
drm/amd/powerplay: revise reading/writing pptable on Fiji
drm/amd/powerplay: revise caching the soft pptable and add it's size
drm/amd/powerplay: add dpm force multiple levels on cz/tonga/fiji/polaris (v2)
drm/amd/powerplay: fix fan speed percent setting error on Polaris10
drm/amd/powerplay: fix bug dpm can't work when resume back on Polaris
...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1563 |
1 files changed, 1310 insertions, 253 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index f0c7b3596480..021c17e50d51 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -27,6 +27,7 @@ #include "vi.h" #include "vid.h" #include "amdgpu_ucode.h" +#include "amdgpu_atombios.h" #include "clearstate_vi.h" #include "gmc/gmc_8_2_d.h" @@ -51,6 +52,7 @@ #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 +#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) @@ -84,6 +86,8 @@ enum { BPM_REG_FGCG_MAX }; +#define RLC_FormatDirectRegListLength 14 + MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); @@ -117,6 +121,20 @@ MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); +MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); +MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); +MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); +MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); +MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); + static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = { {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, @@ -247,6 +265,64 @@ static const u32 tonga_mgcg_cgcg_init[] = mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, }; +static const u32 golden_settings_polaris11_a11[] = +{ + mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, + mmDB_DEBUG2, 0xf00fffff, 0x00000400, + mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, + mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, + mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, + mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, + mmSQ_CONFIG, 0x07f80000, 0x07180000, + mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, + mmTCC_CTRL, 0x00100000, 0xf31fff7f, + mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, + mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, + mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, +}; + +static const u32 polaris11_golden_common_all[] = +{ + mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, + mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, + mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, + mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, + mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, + mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, +}; + +static const u32 golden_settings_polaris10_a11[] = +{ + mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, + mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, + mmDB_DEBUG2, 0xf00fffff, 0x00000400, + mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, + mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, + mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, + mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, + mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, + mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, + mmSQ_CONFIG, 0x07f80000, 0x07180000, + mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, + mmTCC_CTRL, 0x00100000, 0xf31fff7f, + mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, + mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, +}; + +static const u32 polaris10_golden_common_all[] = +{ + mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, + mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, + mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, + mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, + mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, + mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, +}; + static const u32 fiji_golden_common_all[] = { mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, @@ -558,6 +634,8 @@ static const u32 stoney_mgcg_cgcg_init[] = static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); +static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); +static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) { @@ -596,6 +674,22 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) tonga_golden_common_all, (const u32)ARRAY_SIZE(tonga_golden_common_all)); break; + case CHIP_POLARIS11: + amdgpu_program_register_sequence(adev, + golden_settings_polaris11_a11, + (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); + amdgpu_program_register_sequence(adev, + polaris11_golden_common_all, + (const u32)ARRAY_SIZE(polaris11_golden_common_all)); + break; + case CHIP_POLARIS10: + amdgpu_program_register_sequence(adev, + golden_settings_polaris10_a11, + (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); + amdgpu_program_register_sequence(adev, + polaris10_golden_common_all, + (const u32)ARRAY_SIZE(polaris10_golden_common_all)); + break; case CHIP_CARRIZO: amdgpu_program_register_sequence(adev, cz_mgcg_cgcg_init, @@ -747,6 +841,8 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; const struct gfx_firmware_header_v1_0 *cp_hdr; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + unsigned int *tmp = NULL, i; DRM_DEBUG("\n"); @@ -763,6 +859,12 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) case CHIP_FIJI: chip_name = "fiji"; break; + case CHIP_POLARIS11: + chip_name = "polaris11"; + break; + case CHIP_POLARIS10: + chip_name = "polaris10"; + break; case CHIP_STONEY: chip_name = "stoney"; break; @@ -808,9 +910,49 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) if (err) goto out; err = amdgpu_ucode_validate(adev->gfx.rlc_fw); - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); + adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); + + adev->gfx.rlc.save_and_restore_offset = + le32_to_cpu(rlc_hdr->save_and_restore_offset); + adev->gfx.rlc.clear_state_descriptor_offset = + le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); + adev->gfx.rlc.avail_scratch_ram_locations = + le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); + adev->gfx.rlc.reg_restore_list_size = + le32_to_cpu(rlc_hdr->reg_restore_list_size); + adev->gfx.rlc.reg_list_format_start = + le32_to_cpu(rlc_hdr->reg_list_format_start); + adev->gfx.rlc.reg_list_format_separate_start = + le32_to_cpu(rlc_hdr->reg_list_format_separate_start); + adev->gfx.rlc.starting_offsets_start = + le32_to_cpu(rlc_hdr->starting_offsets_start); + adev->gfx.rlc.reg_list_format_size_bytes = + le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); + adev->gfx.rlc.reg_list_size_bytes = + le32_to_cpu(rlc_hdr->reg_list_size_bytes); + + adev->gfx.rlc.register_list_format = + kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + + adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); + + if (!adev->gfx.rlc.register_list_format) { + err = -ENOMEM; + goto out; + } + + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) + adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); + + adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; + + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) + adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); @@ -911,6 +1053,153 @@ out: return err; } +static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, + volatile u32 *buffer) +{ + u32 count = 0, i; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + if (adev->gfx.rlc.cs_data == NULL) + return; + if (buffer == NULL) + return; + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + buffer[count++] = cpu_to_le32(0x80000000); + buffer[count++] = cpu_to_le32(0x80000000); + + for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) { + buffer[count++] = + cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); + buffer[count++] = cpu_to_le32(ext->reg_index - + PACKET3_SET_CONTEXT_REG_START); + for (i = 0; i < ext->reg_count; i++) + buffer[count++] = cpu_to_le32(ext->extent[i]); + } else { + return; + } + } + } + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - + PACKET3_SET_CONTEXT_REG_START); + switch (adev->asic_type) { + case CHIP_TONGA: + case CHIP_POLARIS10: + buffer[count++] = cpu_to_le32(0x16000012); + buffer[count++] = cpu_to_le32(0x0000002A); + break; + case CHIP_POLARIS11: + buffer[count++] = cpu_to_le32(0x16000012); + buffer[count++] = cpu_to_le32(0x00000000); + break; + case CHIP_FIJI: + buffer[count++] = cpu_to_le32(0x3a00161a); + buffer[count++] = cpu_to_le32(0x0000002e); + break; + case CHIP_TOPAZ: + case CHIP_CARRIZO: + buffer[count++] = cpu_to_le32(0x00000002); + buffer[count++] = cpu_to_le32(0x00000000); + break; + case CHIP_STONEY: + buffer[count++] = cpu_to_le32(0x00000000); + buffer[count++] = cpu_to_le32(0x00000000); + break; + default: + buffer[count++] = cpu_to_le32(0x00000000); + buffer[count++] = cpu_to_le32(0x00000000); + break; + } + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); + buffer[count++] = cpu_to_le32(0); +} + +static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) +{ + int r; + + /* clear state block */ + if (adev->gfx.rlc.clear_state_obj) { + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); + if (unlikely(r != 0)) + dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r); + amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + + amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); + adev->gfx.rlc.clear_state_obj = NULL; + } +} + +static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) +{ + volatile u32 *dst_ptr; + u32 dws; + const struct cs_section_def *cs_data; + int r; + + adev->gfx.rlc.cs_data = vi_cs_data; + + cs_data = adev->gfx.rlc.cs_data; + + if (cs_data) { + /* clear state block */ + adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); + + if (adev->gfx.rlc.clear_state_obj == NULL) { + r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, + NULL, NULL, + &adev->gfx.rlc.clear_state_obj); + if (r) { + dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); + gfx_v8_0_rlc_fini(adev); + return r; + } + } + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); + if (unlikely(r != 0)) { + gfx_v8_0_rlc_fini(adev); + return r; + } + r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.clear_state_gpu_addr); + if (r) { + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r); + gfx_v8_0_rlc_fini(adev); + return r; + } + + r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr); + if (r) { + dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r); + gfx_v8_0_rlc_fini(adev); + return r; + } + /* set up the cs buffer */ + dst_ptr = adev->gfx.rlc.cs_ptr; + gfx_v8_0_get_csb_buffer(adev, dst_ptr); + amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + } + + return 0; +} + static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) { int r; @@ -1296,12 +1585,13 @@ fail: return r; } -static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) +static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; u32 mc_shared_chmap, mc_arb_ramcfg; u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; u32 tmp; + int ret; switch (adev->asic_type) { case CHIP_TOPAZ: @@ -1338,6 +1628,34 @@ static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; break; + case CHIP_POLARIS11: + ret = amdgpu_atombios_get_gfx_info(adev); + if (ret) + return ret; + adev->gfx.config.max_gprs = 256; + adev->gfx.config.max_gs_threads = 32; + adev->gfx.config.max_hw_contexts = 8; + + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; + gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; + break; + case CHIP_POLARIS10: + ret = amdgpu_atombios_get_gfx_info(adev); + if (ret) + return ret; + adev->gfx.config.max_gprs = 256; + adev->gfx.config.max_gs_threads = 32; + adev->gfx.config.max_hw_contexts = 8; + + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; + gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; + break; case CHIP_TONGA: adev->gfx.config.max_shader_engines = 4; adev->gfx.config.max_tile_pipes = 8; @@ -1520,6 +1838,8 @@ static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) break; } adev->gfx.config.gb_addr_config = gb_addr_config; + + return 0; } static int gfx_v8_0_sw_init(void *handle) @@ -1553,6 +1873,12 @@ static int gfx_v8_0_sw_init(void *handle) return r; } + r = gfx_v8_0_rlc_init(adev); + if (r) { + DRM_ERROR("Failed to init rlc BOs!\n"); + return r; + } + r = gfx_v8_0_mec_init(adev); if (r) { DRM_ERROR("Failed to init MEC BOs!\n"); @@ -1570,7 +1896,7 @@ static int gfx_v8_0_sw_init(void *handle) ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; } - r = amdgpu_ring_init(adev, ring, 1024 * 1024, + r = amdgpu_ring_init(adev, ring, 1024, PACKET3(PACKET3_NOP, 0x3FFF), 0xf, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP, AMDGPU_RING_TYPE_GFX); @@ -1594,10 +1920,10 @@ static int gfx_v8_0_sw_init(void *handle) ring->me = 1; /* first MEC */ ring->pipe = i / 8; ring->queue = i % 8; - sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue); + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; /* type-2 packets are deprecated on MEC, use type-3 instead */ - r = amdgpu_ring_init(adev, ring, 1024 * 1024, + r = amdgpu_ring_init(adev, ring, 1024, PACKET3(PACKET3_NOP, 0x3FFF), 0xf, &adev->gfx.eop_irq, irq_type, AMDGPU_RING_TYPE_COMPUTE); @@ -1629,7 +1955,9 @@ static int gfx_v8_0_sw_init(void *handle) adev->gfx.ce_ram_size = 0x8000; - gfx_v8_0_gpu_early_init(adev); + r = gfx_v8_0_gpu_early_init(adev); + if (r) + return r; return 0; } @@ -1650,6 +1978,10 @@ static int gfx_v8_0_sw_fini(void *handle) gfx_v8_0_mec_fini(adev); + gfx_v8_0_rlc_fini(adev); + + kfree(adev->gfx.rlc.register_list_format); + return 0; } @@ -2219,6 +2551,410 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); break; + case CHIP_POLARIS11: + modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | + PIPE_CONFIG(ADDR_SURF_P4_16x16)); + modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + + mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_8_BANK)); + + mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK)); + + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) + WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); + + for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) + if (reg_offset != 7) + WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); + + break; + case CHIP_POLARIS10: + modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); + modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + + mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK)); + + mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK)); + + mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK)); + + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) + WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); + + for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) + if (reg_offset != 7) + WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); + + break; case CHIP_STONEY: modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | PIPE_CONFIG(ADDR_SURF_P2) | @@ -2788,6 +3524,188 @@ static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, WREG32(mmCP_INT_CNTL_RING0, tmp); } +static void gfx_v8_0_init_csb(struct amdgpu_device *adev) +{ + /* csib */ + WREG32(mmRLC_CSIB_ADDR_HI, + adev->gfx.rlc.clear_state_gpu_addr >> 32); + WREG32(mmRLC_CSIB_ADDR_LO, + adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); + WREG32(mmRLC_CSIB_LENGTH, + adev->gfx.rlc.clear_state_size); +} + +static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, + int ind_offset, + int list_size, + int *unique_indices, + int *indices_count, + int max_indices, + int *ind_start_offsets, + int *offset_count, + int max_offset) +{ + int indices; + bool new_entry = true; + + for (; ind_offset < list_size; ind_offset++) { + + if (new_entry) { + new_entry = false; + ind_start_offsets[*offset_count] = ind_offset; + *offset_count = *offset_count + 1; + BUG_ON(*offset_count >= max_offset); + } + + if (register_list_format[ind_offset] == 0xFFFFFFFF) { + new_entry = true; + continue; + } + + ind_offset += 2; + + /* look for the matching indice */ + for (indices = 0; + indices < *indices_count; + indices++) { + if (unique_indices[indices] == + register_list_format[ind_offset]) + break; + } + + if (indices >= *indices_count) { + unique_indices[*indices_count] = + register_list_format[ind_offset]; + indices = *indices_count; + *indices_count = *indices_count + 1; + BUG_ON(*indices_count >= max_indices); + } + + register_list_format[ind_offset] = indices; + } +} + +static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) +{ + int i, temp, data; + int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; + int indices_count = 0; + int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + int offset_count = 0; + + int list_size; + unsigned int *register_list_format = + kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); + if (register_list_format == NULL) + return -ENOMEM; + memcpy(register_list_format, adev->gfx.rlc.register_list_format, + adev->gfx.rlc.reg_list_format_size_bytes); + + gfx_v8_0_parse_ind_reg_list(register_list_format, + RLC_FormatDirectRegListLength, + adev->gfx.rlc.reg_list_format_size_bytes >> 2, + unique_indices, + &indices_count, + sizeof(unique_indices) / sizeof(int), + indirect_start_offsets, + &offset_count, + sizeof(indirect_start_offsets)/sizeof(int)); + + /* save and restore list */ + temp = RREG32(mmRLC_SRM_CNTL); + temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; + WREG32(mmRLC_SRM_CNTL, temp); + + WREG32(mmRLC_SRM_ARAM_ADDR, 0); + for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) + WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); + + /* indirect list */ + WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); + for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) + WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); + + list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; + list_size = list_size >> 1; + WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); + WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); + + /* starting offsets starts */ + WREG32(mmRLC_GPM_SCRATCH_ADDR, + adev->gfx.rlc.starting_offsets_start); + for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) + WREG32(mmRLC_GPM_SCRATCH_DATA, + indirect_start_offsets[i]); + + /* unique indices */ + temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; + data = mmRLC_SRM_INDEX_CNTL_DATA_0; + for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { + amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false); + amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false); + } + kfree(register_list_format); + + return 0; +} + +static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) +{ + uint32_t data; + + data = RREG32(mmRLC_SRM_CNTL); + data |= RLC_SRM_CNTL__SRM_ENABLE_MASK; + WREG32(mmRLC_SRM_CNTL, data); +} + +static void polaris11_init_power_gating(struct amdgpu_device *adev) +{ + uint32_t data; + + if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_GFX_DMG)) { + data = RREG32(mmCP_RB_WPTR_POLL_CNTL); + data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; + data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); + WREG32(mmCP_RB_WPTR_POLL_CNTL, data); + + data = 0; + data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); + data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); + data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); + data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); + WREG32(mmRLC_PG_DELAY, data); + + data = RREG32(mmRLC_PG_DELAY_2); + data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; + data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); + WREG32(mmRLC_PG_DELAY_2, data); + + data = RREG32(mmRLC_AUTO_PG_CTRL); + data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; + data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); + WREG32(mmRLC_AUTO_PG_CTRL, data); + } +} + +static void gfx_v8_0_init_pg(struct amdgpu_device *adev) +{ + if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_GFX_DMG | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_GDS | + AMD_PG_SUPPORT_RLC_SMU_HS)) { + gfx_v8_0_init_csb(adev); + gfx_v8_0_init_save_restore_list(adev); + gfx_v8_0_enable_save_restore_machine(adev); + + if (adev->asic_type == CHIP_POLARIS11) + polaris11_init_power_gating(adev); + } +} + void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) { u32 tmp = RREG32(mmRLC_CNTL); @@ -2858,12 +3776,17 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) /* disable CG */ WREG32(mmRLC_CGCG_CGLS_CTRL, 0); + if (adev->asic_type == CHIP_POLARIS11 || + adev->asic_type == CHIP_POLARIS10) + WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0); /* disable PG */ WREG32(mmRLC_PG_CNTL, 0); gfx_v8_0_rlc_reset(adev); + gfx_v8_0_init_pg(adev); + if (!adev->pp_enabled) { if (!adev->firmware.smu_load) { /* legacy rlc firmware loading */ @@ -3035,9 +3958,14 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); switch (adev->asic_type) { case CHIP_TONGA: + case CHIP_POLARIS10: amdgpu_ring_write(ring, 0x16000012); amdgpu_ring_write(ring, 0x0000002A); break; + case CHIP_POLARIS11: + amdgpu_ring_write(ring, 0x16000012); + amdgpu_ring_write(ring, 0x00000000); + break; case CHIP_FIJI: amdgpu_ring_write(ring, 0x3a00161a); amdgpu_ring_write(ring, 0x0000002e); @@ -3122,6 +4050,8 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_OFFSET, ring->doorbell_index); tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 1); } else { tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, @@ -3679,7 +4609,9 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) if (use_doorbell) { if ((adev->asic_type == CHIP_CARRIZO) || (adev->asic_type == CHIP_FIJI) || - (adev->asic_type == CHIP_STONEY)) { + (adev->asic_type == CHIP_STONEY) || + (adev->asic_type == CHIP_POLARIS11) || + (adev->asic_type == CHIP_POLARIS10)) { WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, @@ -3713,7 +4645,9 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); WREG32(mmCP_HQD_PERSISTENT_STATE, tmp); mqd->cp_hqd_persistent_state = tmp; - if (adev->asic_type == CHIP_STONEY) { + if (adev->asic_type == CHIP_STONEY || + adev->asic_type == CHIP_POLARIS11 || + adev->asic_type == CHIP_POLARIS10) { tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); @@ -3845,6 +4779,9 @@ static int gfx_v8_0_hw_fini(void *handle) gfx_v8_0_rlc_stop(adev); gfx_v8_0_cp_compute_fini(adev); + amdgpu_set_powergating_state(adev, + AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); + return 0; } @@ -3889,185 +4826,6 @@ static int gfx_v8_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static void gfx_v8_0_print_status(void *handle) -{ - int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "GFX 8.x registers\n"); - dev_info(adev->dev, " GRBM_STATUS=0x%08X\n", - RREG32(mmGRBM_STATUS)); - dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n", - RREG32(mmGRBM_STATUS2)); - dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n", - RREG32(mmGRBM_STATUS_SE0)); - dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n", - RREG32(mmGRBM_STATUS_SE1)); - dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n", - RREG32(mmGRBM_STATUS_SE2)); - dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n", - RREG32(mmGRBM_STATUS_SE3)); - dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT)); - dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT1)); - dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT2)); - dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT3)); - dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", - RREG32(mmCP_CPF_BUSY_STAT)); - dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_CPF_STALLED_STAT1)); - dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS)); - dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT)); - dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_CPC_STALLED_STAT1)); - dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS)); - - for (i = 0; i < 32; i++) { - dev_info(adev->dev, " GB_TILE_MODE%d=0x%08X\n", - i, RREG32(mmGB_TILE_MODE0 + (i * 4))); - } - for (i = 0; i < 16; i++) { - dev_info(adev->dev, " GB_MACROTILE_MODE%d=0x%08X\n", - i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4))); - } - for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { - dev_info(adev->dev, " se: %d\n", i); - gfx_v8_0_select_se_sh(adev, i, 0xffffffff); - dev_info(adev->dev, " PA_SC_RASTER_CONFIG=0x%08X\n", - RREG32(mmPA_SC_RASTER_CONFIG)); - dev_info(adev->dev, " PA_SC_RASTER_CONFIG_1=0x%08X\n", - RREG32(mmPA_SC_RASTER_CONFIG_1)); - } - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); - - dev_info(adev->dev, " GB_ADDR_CONFIG=0x%08X\n", - RREG32(mmGB_ADDR_CONFIG)); - dev_info(adev->dev, " HDP_ADDR_CONFIG=0x%08X\n", - RREG32(mmHDP_ADDR_CONFIG)); - dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n", - RREG32(mmDMIF_ADDR_CALC)); - - dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n", - RREG32(mmCP_MEQ_THRESHOLDS)); - dev_info(adev->dev, " SX_DEBUG_1=0x%08X\n", - RREG32(mmSX_DEBUG_1)); - dev_info(adev->dev, " TA_CNTL_AUX=0x%08X\n", - RREG32(mmTA_CNTL_AUX)); - dev_info(adev->dev, " SPI_CONFIG_CNTL=0x%08X\n", - RREG32(mmSPI_CONFIG_CNTL)); - dev_info(adev->dev, " SQ_CONFIG=0x%08X\n", - RREG32(mmSQ_CONFIG)); - dev_info(adev->dev, " DB_DEBUG=0x%08X\n", - RREG32(mmDB_DEBUG)); - dev_info(adev->dev, " DB_DEBUG2=0x%08X\n", - RREG32(mmDB_DEBUG2)); - dev_info(adev->dev, " DB_DEBUG3=0x%08X\n", - RREG32(mmDB_DEBUG3)); - dev_info(adev->dev, " CB_HW_CONTROL=0x%08X\n", - RREG32(mmCB_HW_CONTROL)); - dev_info(adev->dev, " SPI_CONFIG_CNTL_1=0x%08X\n", - RREG32(mmSPI_CONFIG_CNTL_1)); - dev_info(adev->dev, " PA_SC_FIFO_SIZE=0x%08X\n", - RREG32(mmPA_SC_FIFO_SIZE)); - dev_info(adev->dev, " VGT_NUM_INSTANCES=0x%08X\n", - RREG32(mmVGT_NUM_INSTANCES)); - dev_info(adev->dev, " CP_PERFMON_CNTL=0x%08X\n", - RREG32(mmCP_PERFMON_CNTL)); - dev_info(adev->dev, " PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n", - RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS)); - dev_info(adev->dev, " VGT_CACHE_INVALIDATION=0x%08X\n", - RREG32(mmVGT_CACHE_INVALIDATION)); - dev_info(adev->dev, " VGT_GS_VERTEX_REUSE=0x%08X\n", - RREG32(mmVGT_GS_VERTEX_REUSE)); - dev_info(adev->dev, " PA_SC_LINE_STIPPLE_STATE=0x%08X\n", - RREG32(mmPA_SC_LINE_STIPPLE_STATE)); - dev_info(adev->dev, " PA_CL_ENHANCE=0x%08X\n", - RREG32(mmPA_CL_ENHANCE)); - dev_info(adev->dev, " PA_SC_ENHANCE=0x%08X\n", - RREG32(mmPA_SC_ENHANCE)); - - dev_info(adev->dev, " CP_ME_CNTL=0x%08X\n", - RREG32(mmCP_ME_CNTL)); - dev_info(adev->dev, " CP_MAX_CONTEXT=0x%08X\n", - RREG32(mmCP_MAX_CONTEXT)); - dev_info(adev->dev, " CP_ENDIAN_SWAP=0x%08X\n", - RREG32(mmCP_ENDIAN_SWAP)); - dev_info(adev->dev, " CP_DEVICE_ID=0x%08X\n", - RREG32(mmCP_DEVICE_ID)); - - dev_info(adev->dev, " CP_SEM_WAIT_TIMER=0x%08X\n", - RREG32(mmCP_SEM_WAIT_TIMER)); - - dev_info(adev->dev, " CP_RB_WPTR_DELAY=0x%08X\n", - RREG32(mmCP_RB_WPTR_DELAY)); - dev_info(adev->dev, " CP_RB_VMID=0x%08X\n", - RREG32(mmCP_RB_VMID)); - dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n", - RREG32(mmCP_RB0_CNTL)); - dev_info(adev->dev, " CP_RB0_WPTR=0x%08X\n", - RREG32(mmCP_RB0_WPTR)); - dev_info(adev->dev, " CP_RB0_RPTR_ADDR=0x%08X\n", - RREG32(mmCP_RB0_RPTR_ADDR)); - dev_info(adev->dev, " CP_RB0_RPTR_ADDR_HI=0x%08X\n", - RREG32(mmCP_RB0_RPTR_ADDR_HI)); - dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n", - RREG32(mmCP_RB0_CNTL)); - dev_info(adev->dev, " CP_RB0_BASE=0x%08X\n", - RREG32(mmCP_RB0_BASE)); - dev_info(adev->dev, " CP_RB0_BASE_HI=0x%08X\n", - RREG32(mmCP_RB0_BASE_HI)); - dev_info(adev->dev, " CP_MEC_CNTL=0x%08X\n", - RREG32(mmCP_MEC_CNTL)); - dev_info(adev->dev, " CP_CPF_DEBUG=0x%08X\n", - RREG32(mmCP_CPF_DEBUG)); - - dev_info(adev->dev, " SCRATCH_ADDR=0x%08X\n", - RREG32(mmSCRATCH_ADDR)); - dev_info(adev->dev, " SCRATCH_UMSK=0x%08X\n", - RREG32(mmSCRATCH_UMSK)); - - dev_info(adev->dev, " CP_INT_CNTL_RING0=0x%08X\n", - RREG32(mmCP_INT_CNTL_RING0)); - dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n", - RREG32(mmRLC_LB_CNTL)); - dev_info(adev->dev, " RLC_CNTL=0x%08X\n", - RREG32(mmRLC_CNTL)); - dev_info(adev->dev, " RLC_CGCG_CGLS_CTRL=0x%08X\n", - RREG32(mmRLC_CGCG_CGLS_CTRL)); - dev_info(adev->dev, " RLC_LB_CNTR_INIT=0x%08X\n", - RREG32(mmRLC_LB_CNTR_INIT)); - dev_info(adev->dev, " RLC_LB_CNTR_MAX=0x%08X\n", - RREG32(mmRLC_LB_CNTR_MAX)); - dev_info(adev->dev, " RLC_LB_INIT_CU_MASK=0x%08X\n", - RREG32(mmRLC_LB_INIT_CU_MASK)); - dev_info(adev->dev, " RLC_LB_PARAMS=0x%08X\n", - RREG32(mmRLC_LB_PARAMS)); - dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n", - RREG32(mmRLC_LB_CNTL)); - dev_info(adev->dev, " RLC_MC_CNTL=0x%08X\n", - RREG32(mmRLC_MC_CNTL)); - dev_info(adev->dev, " RLC_UCODE_CNTL=0x%08X\n", - RREG32(mmRLC_UCODE_CNTL)); - - mutex_lock(&adev->srbm_mutex); - for (i = 0; i < 16; i++) { - vi_srbm_select(adev, 0, 0, 0, i); - dev_info(adev->dev, " VM %d:\n", i); - dev_info(adev->dev, " SH_MEM_CONFIG=0x%08X\n", - RREG32(mmSH_MEM_CONFIG)); - dev_info(adev->dev, " SH_MEM_APE1_BASE=0x%08X\n", - RREG32(mmSH_MEM_APE1_BASE)); - dev_info(adev->dev, " SH_MEM_APE1_LIMIT=0x%08X\n", - RREG32(mmSH_MEM_APE1_LIMIT)); - dev_info(adev->dev, " SH_MEM_BASES=0x%08X\n", - RREG32(mmSH_MEM_BASES)); - } - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); -} - static int gfx_v8_0_soft_reset(void *handle) { u32 grbm_soft_reset = 0, srbm_soft_reset = 0; @@ -4108,7 +4866,6 @@ static int gfx_v8_0_soft_reset(void *handle) SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); if (grbm_soft_reset || srbm_soft_reset) { - gfx_v8_0_print_status((void *)adev); /* stop the rlc */ gfx_v8_0_rlc_stop(adev); @@ -4168,7 +4925,6 @@ static int gfx_v8_0_soft_reset(void *handle) /* Wait a little for things to settle down */ udelay(50); - gfx_v8_0_print_status((void *)adev); } return 0; } @@ -4250,6 +5006,7 @@ static int gfx_v8_0_early_init(void *handle) gfx_v8_0_set_ring_funcs(adev); gfx_v8_0_set_irq_funcs(adev); gfx_v8_0_set_gds_init(adev); + gfx_v8_0_set_rlc_funcs(adev); return 0; } @@ -4272,17 +5029,109 @@ static int gfx_v8_0_late_init(void *handle) if (r) return r; + amdgpu_set_powergating_state(adev, + AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); + return 0; } +static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, temp; + + /* Send msg to SMU via Powerplay */ + amdgpu_set_powergating_state(adev, + AMD_IP_BLOCK_TYPE_SMC, + enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); + + if (enable) { + /* Enable static MGPG */ + temp = data = RREG32(mmRLC_PG_CNTL); + data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; + + if (temp != data) + WREG32(mmRLC_PG_CNTL, data); + } else { + temp = data = RREG32(mmRLC_PG_CNTL); + data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; + + if (temp != data) + WREG32(mmRLC_PG_CNTL, data); + } +} + +static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, temp; + + if (enable) { + /* Enable dynamic MGPG */ + temp = data = RREG32(mmRLC_PG_CNTL); + data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; + + if (temp != data) + WREG32(mmRLC_PG_CNTL, data); + } else { + temp = data = RREG32(mmRLC_PG_CNTL); + data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; + + if (temp != data) + WREG32(mmRLC_PG_CNTL, data); + } +} + +static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, temp; + + if (enable) { + /* Enable quick PG */ + temp = data = RREG32(mmRLC_PG_CNTL); + data |= 0x100000; + + if (temp != data) + WREG32(mmRLC_PG_CNTL, data); + } else { + temp = data = RREG32(mmRLC_PG_CNTL); + data &= ~0x100000; + + if (temp != data) + WREG32(mmRLC_PG_CNTL, data); + } +} + static int gfx_v8_0_set_powergating_state(void *handle, enum amd_powergating_state state) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) + return 0; + + switch (adev->asic_type) { + case CHIP_POLARIS11: + if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) + polaris11_enable_gfx_static_mg_power_gating(adev, + state == AMD_PG_STATE_GATE ? true : false); + else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) + polaris11_enable_gfx_dynamic_mg_power_gating(adev, + state == AMD_PG_STATE_GATE ? true : false); + else + polaris11_enable_gfx_quick_mg_power_gating(adev, + state == AMD_PG_STATE_GATE ? true : false); + break; + default: + break; + } + return 0; } -static void fiji_send_serdes_cmd(struct amdgpu_device *adev, - uint32_t reg_addr, uint32_t cmd) +static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, + uint32_t reg_addr, uint32_t cmd) { uint32_t data; @@ -4292,7 +5141,8 @@ static void fiji_send_serdes_cmd(struct amdgpu_device *adev, WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); data = RREG32(mmRLC_SERDES_WR_CTRL); - data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | + if (adev->asic_type == CHIP_STONEY) + data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | RLC_SERDES_WR_CTRL__P1_SELECT_MASK | RLC_SERDES_WR_CTRL__P2_SELECT_MASK | @@ -4300,42 +5150,218 @@ static void fiji_send_serdes_cmd(struct amdgpu_device *adev, RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | RLC_SERDES_WR_CTRL__POWER_UP_MASK | RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | - RLC_SERDES_WR_CTRL__BPM_DATA_MASK | - RLC_SERDES_WR_CTRL__REG_ADDR_MASK | RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); + else + data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | + RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | + RLC_SERDES_WR_CTRL__P1_SELECT_MASK | + RLC_SERDES_WR_CTRL__P2_SELECT_MASK | + RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | + RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | + RLC_SERDES_WR_CTRL__POWER_UP_MASK | + RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | + RLC_SERDES_WR_CTRL__BPM_DATA_MASK | + RLC_SERDES_WR_CTRL__REG_ADDR_MASK | + RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | - (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | - (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | - (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); + (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | + (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | + (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); WREG32(mmRLC_SERDES_WR_CTRL, data); } -static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable) +#define MSG_ENTER_RLC_SAFE_MODE 1 +#define MSG_EXIT_RLC_SAFE_MODE 0 + +#define RLC_GPR_REG2__REQ_MASK 0x00000001 +#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 +#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e + +static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev) +{ + u32 data = 0; + unsigned i; + + data = RREG32(mmRLC_CNTL); + if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) + return; + + if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || + (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_GFX_DMG))) { + data |= RLC_GPR_REG2__REQ_MASK; + data &= ~RLC_GPR_REG2__MESSAGE_MASK; + data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); + WREG32(mmRLC_GPR_REG2, data); + + for (i = 0; i < adev->usec_timeout; i++) { + if ((RREG32(mmRLC_GPM_STAT) & + (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | + RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == + (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | + RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) + break; + udelay(1); + } + + for (i = 0; i < adev->usec_timeout; i++) { + if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0) + break; + udelay(1); + } + adev->gfx.rlc.in_safe_mode = true; + } +} + +static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev) +{ + u32 data; + unsigned i; + + data = RREG32(mmRLC_CNTL); + if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) + return; + + if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || + (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_GFX_DMG))) { + data |= RLC_GPR_REG2__REQ_MASK; + data &= ~RLC_GPR_REG2__MESSAGE_MASK; + data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); + WREG32(mmRLC_GPR_REG2, data); + adev->gfx.rlc.in_safe_mode = false; + } + + for (i = 0; i < adev->usec_timeout; i++) { + if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0) + break; + udelay(1); + } +} + +static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) +{ + u32 data; + unsigned i; + + data = RREG32(mmRLC_CNTL); + if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) + return; + + if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { + data |= RLC_SAFE_MODE__CMD_MASK; + data &= ~RLC_SAFE_MODE__MESSAGE_MASK; + data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); + WREG32(mmRLC_SAFE_MODE, data); + + for (i = 0; i < adev->usec_timeout; i++) { + if ((RREG32(mmRLC_GPM_STAT) & + (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | + RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == + (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | + RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) + break; + udelay(1); + } + + for (i = 0; i < adev->usec_timeout; i++) { + if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0) + break; + udelay(1); + } + adev->gfx.rlc.in_safe_mode = true; + } +} + +static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) +{ + u32 data = 0; + unsigned i; + + data = RREG32(mmRLC_CNTL); + if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) + return; + + if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { + if (adev->gfx.rlc.in_safe_mode) { + data |= RLC_SAFE_MODE__CMD_MASK; + data &= ~RLC_SAFE_MODE__MESSAGE_MASK; + WREG32(mmRLC_SAFE_MODE, data); + adev->gfx.rlc.in_safe_mode = false; + } + } + + for (i = 0; i < adev->usec_timeout; i++) { + if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0) + break; + udelay(1); + } +} + +static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev) +{ + adev->gfx.rlc.in_safe_mode = true; +} + +static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev) +{ + adev->gfx.rlc.in_safe_mode = false; +} + +static const struct amdgpu_rlc_funcs cz_rlc_funcs = { + .enter_safe_mode = cz_enter_rlc_safe_mode, + .exit_safe_mode = cz_exit_rlc_safe_mode +}; + +static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { + .enter_safe_mode = iceland_enter_rlc_safe_mode, + .exit_safe_mode = iceland_exit_rlc_safe_mode +}; + +static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = { + .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode, + .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode +}; + +static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) { uint32_t temp, data; + adev->gfx.rlc.funcs->enter_safe_mode(adev); + /* It is disabled by HW by default */ - if (enable) { - /* 1 - RLC memory Light sleep */ - temp = data = RREG32(mmRLC_MEM_SLP_CNTL); - data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; - if (temp != data) - WREG32(mmRLC_MEM_SLP_CNTL, data); + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { + /* 1 - RLC memory Light sleep */ + temp = data = RREG32(mmRLC_MEM_SLP_CNTL); + data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; + if (temp != data) + WREG32(mmRLC_MEM_SLP_CNTL, data); + } - /* 2 - CP memory Light sleep */ - temp = data = RREG32(mmCP_MEM_SLP_CNTL); - data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; - if (temp != data) - WREG32(mmCP_MEM_SLP_CNTL, data); + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { + /* 2 - CP memory Light sleep */ + temp = data = RREG32(mmCP_MEM_SLP_CNTL); + data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; + if (temp != data) + WREG32(mmCP_MEM_SLP_CNTL, data); + } + } /* 3 - RLC_CGTT_MGCG_OVERRIDE */ temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); - data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | - RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | - RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | - RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); + if (adev->flags & AMD_IS_APU) + data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | + RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | + RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); + else + data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | + RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | + RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | + RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); if (temp != data) WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); @@ -4344,19 +5370,23 @@ static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev, gfx_v8_0_wait_for_rlc_serdes(adev); /* 5 - clear mgcg override */ - fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); - - /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ - temp = data = RREG32(mmCGTS_SM_CTRL_REG); - data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); - data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); - data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; - data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; - data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; - data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; - data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); - if (temp != data) - WREG32(mmCGTS_SM_CTRL_REG, data); + gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { + /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ + temp = data = RREG32(mmCGTS_SM_CTRL_REG); + data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); + data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); + data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; + data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; + if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && + (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) + data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; + data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; + data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); + if (temp != data) + WREG32(mmCGTS_SM_CTRL_REG, data); + } udelay(50); /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ @@ -4396,23 +5426,27 @@ static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev, gfx_v8_0_wait_for_rlc_serdes(adev); /* 6 - set mgcg override */ - fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); + gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); udelay(50); /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ gfx_v8_0_wait_for_rlc_serdes(adev); } + + adev->gfx.rlc.funcs->exit_safe_mode(adev); } -static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev, - bool enable) +static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, + bool enable) { uint32_t temp, temp1, data, data1; temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); - if (enable) { + adev->gfx.rlc.funcs->enter_safe_mode(adev); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/ * Cmp_busy/GFX_Idle interrupts */ @@ -4427,25 +5461,29 @@ static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev, gfx_v8_0_wait_for_rlc_serdes(adev); /* 3 - clear cgcg override */ - fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); + gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ gfx_v8_0_wait_for_rlc_serdes(adev); /* 4 - write cmd to set CGLS */ - fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); + gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); /* 5 - enable cgcg */ data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; - /* enable cgls*/ - data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { + /* enable cgls*/ + data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; - temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); - data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; + temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); + data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; - if (temp1 != data1) - WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); + if (temp1 != data1) + WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); + } else { + data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + } if (temp != data) WREG32(mmRLC_CGCG_CGLS_CTRL, data); @@ -4470,36 +5508,38 @@ static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev, gfx_v8_0_wait_for_rlc_serdes(adev); /* write cmd to Set CGCG Overrride */ - fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); + gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ gfx_v8_0_wait_for_rlc_serdes(adev); /* write cmd to Clear CGLS */ - fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); + gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); /* disable cgcg, cgls should be disabled too. */ data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | - RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); + RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); if (temp != data) WREG32(mmRLC_CGCG_CGLS_CTRL, data); } + + adev->gfx.rlc.funcs->exit_safe_mode(adev); } -static int fiji_update_gfx_clock_gating(struct amdgpu_device *adev, - bool enable) +static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, + bool enable) { if (enable) { /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) * === MGCG + MGLS + TS(CG/LS) === */ - fiji_update_medium_grain_clock_gating(adev, enable); - fiji_update_coarse_grain_clock_gating(adev, enable); + gfx_v8_0_update_medium_grain_clock_gating(adev, enable); + gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); } else { /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) * === CGCG + CGLS === */ - fiji_update_coarse_grain_clock_gating(adev, enable); - fiji_update_medium_grain_clock_gating(adev, enable); + gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); + gfx_v8_0_update_medium_grain_clock_gating(adev, enable); } return 0; } @@ -4511,8 +5551,10 @@ static int gfx_v8_0_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_FIJI: - fiji_update_gfx_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + case CHIP_CARRIZO: + case CHIP_STONEY: + gfx_v8_0_update_gfx_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); break; default: break; @@ -5033,7 +6075,6 @@ const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .is_idle = gfx_v8_0_is_idle, .wait_for_idle = gfx_v8_0_wait_for_idle, .soft_reset = gfx_v8_0_soft_reset, - .print_status = gfx_v8_0_print_status, .set_clockgating_state = gfx_v8_0_set_clockgating_state, .set_powergating_state = gfx_v8_0_set_powergating_state, }; @@ -5112,6 +6153,22 @@ static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; } +static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_TOPAZ: + case CHIP_STONEY: + adev->gfx.rlc.funcs = &iceland_rlc_funcs; + break; + case CHIP_CARRIZO: + adev->gfx.rlc.funcs = &cz_rlc_funcs; + break; + default: + adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs; + break; + } +} + static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ |